diff --git a/.gitignore b/.gitignore index 8b7e502..7337916 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ dist-ssr *.njsproj *.sln *.sw? + +.env diff --git a/README.md b/README.md index c082f61..2038f29 100644 --- a/README.md +++ b/README.md @@ -2,61 +2,10 @@ Mastermind is a programming language designed to compile to the esoteric language _Brainfuck_. -Brainfuck is essentially a modern interpretation of the classical Turing machine. It consists of a tape of 8-bit values, with simple increment/decrement, move left/right, and control flow operations. The full language only uses 8 control characters: `+-><.,[]`. +Brainfuck is essentially a modern interpretation of the classical Turing machine. It consists of an array (or _tape_) of 8-bit values, with simple increment/decrement, move left/right, and control flow operations. The full language only uses 8 control characters: `+-><.,[]`. -Imagine if C was designed for computer architectures that run Brainfuck directly, that is what Mastermind is intended to be. +Imagine an alternate reality where C was designed for computer architectures that run Brainfuck natively, that is what Mastermind is intended to be. -## Development and Setup +Mastermind language/compiler reference can be found here: [https://github.com/Heathcorp/Mastermind/blob/main/reference.md]() -### Quickstart: - -- Install Rust/Cargo and Node/NPM. -- Install Yarn: `npm i --global yarn`. -- Run `yarn`. -- Run `yarn build:wasm`. -- Run `yarn build:grammar`. -- Run `yarn dev`, then follow the link to http://localhost:5173. - -Commits to _dev_ and _main_ are published to https://staging.mastermind.lostpixels.org and https://mastermind.lostpixels.org respectively. - -### Overview: - -This repository contains two main components: the compiler and the web IDE. There are GitHub Actions workflows which build, test, and deploy the web IDE (with bundled compiler) to Firebase Web Hosting. - -#### Compiler - -The `./compiler` subdirectory contains a Cargo (Rust) package, ensure Rust is installed. - -The compiler codebase has two main entrypoints: `main.rs` and `lib.rs`, for the command-line and WASM compilation targets respectively. All other Rust source files are common between compilation targets. - -Key files to look at: - -- `tokeniser.rs`: tokenises the raw text files into Mastermind syntax tokens. -- `parser.rs`: parses strings of tokens into higher-level Mastermind clauses. -- `compiler.rs`: compiles the high-level clauses into a list of basic instructions akin to an intermediate representation (IR). -- `builder.rs`: takes the basic instructions from the compiler and builds the final Brainfuck program. - -Some key commands: - -(from within the `./compiler` subdirectory) - -- `cargo run -- -h`: runs the command-line compiler module and displays command help information -- `cargo test`: runs the automated test suite -- `cargo build`: builds the command-line module -- `wasm-pack build`: builds the WASM module - -#### Web IDE - -The project root directory `package.json`/`yarn.lock` defines a Node package managed with Yarn. Most important commands or behaviours are defined as `npm run` or `yarn` scripts within `package.json`. - -Ensure Node is installed, then ensure Yarn is installed with `npm i --global yarn`. - -The web IDE is a SolidJS app using TypeScript/TSX, and Vite as a bundler. The text editing portions of the UI are provided by the _codemirror_ plugin, and syntax highlighting is defined in the included _lezer_ grammar: `./src/lexer/mastermind.grammar`. - -Some key commands: - -- `yarn`: installs npm packages -- `yarn build:wasm`: builds the compiler WASM module -- `yarn build:grammar`: compiles the lezer grammar to JS for use in codemirror -- `yarn dev`: runs the SolidJS app in a local Vite dev server -- `yarn build`: builds the SolidJS app +Development guide can be found here: [https://github.com/Heathcorp/Mastermind/blob/main/devguide.md]() diff --git a/compiler/src/backend/bf.rs b/compiler/src/backend/bf.rs new file mode 100644 index 0000000..3aef49b --- /dev/null +++ b/compiler/src/backend/bf.rs @@ -0,0 +1,248 @@ +use super::common::{ + BrainfuckBuilder, BrainfuckBuilderData, BrainfuckProgram, CellAllocator, CellAllocatorData, + OpcodeVariant, TapeCellVariant, +}; +use crate::macros::macros::{r_assert, r_panic}; + +pub type TapeCell = i32; +impl TapeCellVariant for TapeCell { + fn origin_cell() -> TapeCell { + 0 + } + fn with_offset(&self, offset: i32) -> Self { + self + offset + } +} + +#[derive(Clone, Copy, Debug)] +#[cfg_attr(test, derive(PartialEq))] +pub enum Opcode { + Add, + Subtract, + Right, + Left, + OpenLoop, + CloseLoop, + Output, + Input, + Clear, +} + +impl OpcodeVariant for Opcode { + fn try_from_char(c: char) -> Option { + match c { + '+' => Some(Opcode::Add), + '-' => Some(Opcode::Subtract), + '>' => Some(Opcode::Right), + '<' => Some(Opcode::Left), + '[' => Some(Opcode::OpenLoop), + ']' => Some(Opcode::CloseLoop), + '.' => Some(Opcode::Output), + ',' => Some(Opcode::Input), + _ => None, + } + } +} + +impl CellAllocator for CellAllocatorData { + /// Check if the desired number of cells can be allocated to the right of a given location + fn check_allocatable(&mut self, location: &TapeCell, size: usize) -> bool { + for k in 0..size { + if self.cells.contains(&(location + k as i32)) { + return false; + } + } + return true; + } + + /// Allocate size number of cells and return the location, optionally specify a location + fn allocate(&mut self, location: Option, size: usize) -> Result { + if let Some(l) = location { + if !self.check_allocatable(&l, size) { + r_panic!("Location specifier @{l} conflicts with another allocation"); + } + } + + // find free space + let mut region_start = location.unwrap_or(0); + for i in region_start.. { + if self.cells.contains(&i) { + region_start = i + 1; + } else if i - region_start == (size as i32 - 1) { + break; + } + } + + for i in region_start..(region_start + size as i32) { + r_assert!( + self.cells.insert(i), + "Unreachable error detected in cell allocation: allocate({location:?}, {size:?})" + ); + } + + Ok(region_start) + } + + /// Allocate a cell as close as possible to the given cell, + /// used for optimisations which need extra cells for efficiency + fn allocate_temp_cell(&mut self, location: TapeCell) -> TapeCell { + // alternate left then right, getting further and further out + let mut left_iter = (0..=location).rev(); + let mut right_iter = (location + 1)..; + loop { + if let Some(i) = left_iter.next() { + // unallocated cell, allocate it and return + if self.cells.insert(i) { + return i; + } + } + + if let Some(i) = right_iter.next() { + if self.cells.insert(i) { + return i; + } + } + } + } + + fn free(&mut self, cell: TapeCell, size: usize) -> Result<(), String> { + for i in cell..(cell + size as i32) { + r_assert!( + self.cells.remove(&i), + "Cannot free cell @{i} as it is not allocated.", + ); + } + + Ok(()) + } +} + +impl BrainfuckProgram for Vec { + fn to_string(self) -> String { + let mut s = String::new(); + self.into_iter().for_each(|o| { + s.push_str(match o { + Opcode::Add => "+", + Opcode::Subtract => "-", + Opcode::Right => ">", + Opcode::Left => "<", + Opcode::OpenLoop => "[", + Opcode::CloseLoop => "]", + Opcode::Output => ".", + Opcode::Input => ",", + Opcode::Clear => "[-]", + }) + }); + s + } + + fn from_str(s: &str) -> Vec { + let mut ops = Vec::new(); + let mut i = 0; + while i < s.len() { + let substr = &s[i..]; + if substr.starts_with("[-]") { + ops.push(Opcode::Clear); + i += 3; + } else { + match substr.chars().next().unwrap() { + '+' => ops.push(Opcode::Add), + '-' => ops.push(Opcode::Subtract), + '>' => ops.push(Opcode::Right), + '<' => ops.push(Opcode::Left), + '[' => ops.push(Opcode::OpenLoop), + ']' => ops.push(Opcode::CloseLoop), + '.' => ops.push(Opcode::Output), + ',' => ops.push(Opcode::Input), + _ => (), // could put a little special opcode in for other characters + } + i += 1; + } + } + + ops + } +} + +impl BrainfuckProgram for BrainfuckBuilderData { + fn to_string(self) -> String { + self.opcodes.to_string() + } + + fn from_str(s: &str) -> BrainfuckBuilderData { + BrainfuckBuilderData { + opcodes: Vec::from_str(s), + head_pos: 0, + // head_pos: TapeCell(0), + } + } +} + +impl BrainfuckBuilder for BrainfuckBuilderData { + fn new() -> BrainfuckBuilderData { + BrainfuckBuilderData { + opcodes: Vec::new(), + head_pos: 0, + } + } + fn len(&self) -> usize { + self.opcodes.len() + } + fn push(&mut self, op: Opcode) { + self.opcodes.push(op); + } + fn extend(&mut self, ops: T) + where + T: IntoIterator, + { + self.opcodes.extend(ops); + } + fn move_to_cell(&mut self, cell: TapeCell) { + let x = cell; + let x_pos = self.head_pos; + //Move x level + if x_pos < x { + for _ in x_pos..x { + self.opcodes.push(Opcode::Right); + } + } else if x < x_pos { + // theoretically equivalent to cell..head_pos? + for _ in ((x + 1)..=x_pos).rev() { + self.opcodes.push(Opcode::Left); + } + } + + self.head_pos = cell; + } + + fn add_to_current_cell(&mut self, imm: i8) { + if imm > 0 { + for _ in 0..imm { + self.opcodes.push(Opcode::Add); + } + } else if imm < 0 { + // needs to be i32 because -(-128) = -128 in i8-land + for _ in 0..-(imm as i32) { + self.opcodes.push(Opcode::Subtract); + } + } + } + + fn clear_current_cell(&mut self) { + self.opcodes.push(Opcode::OpenLoop); + self.opcodes.push(Opcode::Subtract); + self.opcodes.push(Opcode::CloseLoop); + } + fn output_current_cell(&mut self) { + self.opcodes.push(Opcode::Output); + } + fn input_to_current_cell(&mut self) { + self.opcodes.push(Opcode::Input); + } + fn open_loop(&mut self) { + self.opcodes.push(Opcode::OpenLoop); + } + fn close_loop(&mut self) { + self.opcodes.push(Opcode::CloseLoop); + } +} diff --git a/compiler/src/backend/bf2d.rs b/compiler/src/backend/bf2d.rs new file mode 100644 index 0000000..f099f2c --- /dev/null +++ b/compiler/src/backend/bf2d.rs @@ -0,0 +1,401 @@ +use super::common::{ + BrainfuckBuilder, BrainfuckBuilderData, BrainfuckProgram, CellAllocator, CellAllocatorData, + OpcodeVariant, TapeCellVariant, +}; +use crate::macros::macros::{r_assert, r_panic}; + +use std::hash::Hash; + +#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy)] +pub struct TapeCell2D(pub i32, pub i32); +impl TapeCellVariant for TapeCell2D { + fn origin_cell() -> TapeCell2D { + TapeCell2D(0, 0) + } + fn with_offset(&self, offset: i32) -> Self { + TapeCell2D(self.0 + offset, self.1) + } +} + +#[derive(Clone, Copy, Debug)] +#[cfg_attr(test, derive(PartialEq))] +pub enum Opcode2D { + Add, + Subtract, + Right, + Left, + OpenLoop, + CloseLoop, + Output, + Input, + Clear, + Up, + Down, +} + +impl OpcodeVariant for Opcode2D { + fn try_from_char(c: char) -> Option { + match c { + '+' => Some(Opcode2D::Add), + '-' => Some(Opcode2D::Subtract), + '>' => Some(Opcode2D::Right), + '<' => Some(Opcode2D::Left), + '^' => Some(Opcode2D::Up), + 'v' => Some(Opcode2D::Down), + '[' => Some(Opcode2D::OpenLoop), + ']' => Some(Opcode2D::CloseLoop), + '.' => Some(Opcode2D::Output), + ',' => Some(Opcode2D::Input), + _ => None, + } + } +} + +impl BrainfuckProgram for Vec { + fn to_string(self) -> String { + let mut s = String::new(); + self.into_iter().for_each(|o| { + s.push_str(match o { + Opcode2D::Add => "+", + Opcode2D::Subtract => "-", + Opcode2D::Right => ">", + Opcode2D::Left => "<", + Opcode2D::OpenLoop => "[", + Opcode2D::CloseLoop => "]", + Opcode2D::Output => ".", + Opcode2D::Input => ",", + Opcode2D::Clear => "[-]", + Opcode2D::Up => "^", + Opcode2D::Down => "v", + }) + }); + s + } + + fn from_str(s: &str) -> Vec { + let mut ops = Vec::new(); + let mut i = 0; + while i < s.len() { + let substr = &s[i..]; + if substr.starts_with("[-]") { + ops.push(Opcode2D::Clear); + i += 3; + } else { + match substr.chars().next().unwrap() { + '+' => ops.push(Opcode2D::Add), + '-' => ops.push(Opcode2D::Subtract), + '>' => ops.push(Opcode2D::Right), + '<' => ops.push(Opcode2D::Left), + '[' => ops.push(Opcode2D::OpenLoop), + ']' => ops.push(Opcode2D::CloseLoop), + '.' => ops.push(Opcode2D::Output), + ',' => ops.push(Opcode2D::Input), + '^' => ops.push(Opcode2D::Up), + 'v' => ops.push(Opcode2D::Down), + _ => (), // could put a little special opcode in for other characters + } + i += 1; + } + } + + ops + } +} + +impl std::fmt::Display for TapeCell2D { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("({}, {})", self.0, self.1))?; + Ok(()) + } +} + +// TODO: refactor +impl CellAllocator for CellAllocatorData { + /// Check if the desired number of cells can be allocated to the right of a given location + fn check_allocatable(&mut self, location: &TapeCell2D, size: usize) -> bool { + for k in 0..size { + if self + .cells + .contains(&TapeCell2D(location.0 + k as i32, location.1)) + { + return false; + } + } + return true; + } + + /// Will either check a specific location can be allocated at the chosen size or if no location is + /// provided it will find a memory location where this size can be allocated + /// Uses a variety of memory allocation methods based on settings + fn allocate( + &mut self, + location: Option, + size: usize, + ) -> Result { + let mut region_start = location.unwrap_or(TapeCell2D(0, 0)); + //Check specified memory allocation above to ensure that this works nicely with all algorithms + if let Some(l) = location { + if !self.check_allocatable(&l, size) { + r_panic!("Location specifier @{l} conflicts with another allocation"); + } + } else { + // should the region start at the current tape head? + if self.config.memory_allocation_method == 0 { + for i in region_start.0.. { + if self.cells.contains(&TapeCell2D(i, region_start.1)) { + region_start = TapeCell2D(i + 1, region_start.1); + } else if i - region_start.0 == (size as i32 - 1) { + break; + } + } + } else if self.config.memory_allocation_method == 1 { + //Zig Zag + let mut found = false; + let mut loops = 0; + let mut i; + let mut j; + while !found { + i = region_start.0 + loops; + j = region_start.1; + for _ in 0..=loops { + if self.check_allocatable(&TapeCell2D(i, j), size) { + found = true; + region_start = TapeCell2D(i, j); + break; + } + i = i - 1; + j = j + 1; + } + loops += 1; + } + } else if self.config.memory_allocation_method == 2 { + //Spiral + let mut found = false; + let mut loops = 1; + let directions = ['N', 'E', 'S', 'W']; + let mut i = region_start.0; + let mut j = region_start.1; + while !found { + for dir in directions { + match dir { + 'N' => { + for _ in 0..loops { + j += 1; + if self.check_allocatable(&TapeCell2D(i, j), size) { + found = true; + region_start = TapeCell2D(i, j); + break; + } + } + } + 'E' => { + for _ in 0..loops { + i += 1; + if self.check_allocatable(&TapeCell2D(i, j), size) { + found = true; + region_start = TapeCell2D(i, j); + break; + } + } + } + 'S' => { + for _ in 0..loops { + j -= 1; + if self.check_allocatable(&TapeCell2D(i, j), size) { + found = true; + region_start = TapeCell2D(i, j); + break; + } + } + } + 'W' => { + for _ in 0..loops { + i -= 1; + if self.check_allocatable(&TapeCell2D(i, j), size) { + found = true; + region_start = TapeCell2D(i, j); + break; + } + } + } + _ => {} + } + if found { + break; + } + } + if found { + break; + } + i -= 1; + j -= 1; + loops += 2; + } + } else if self.config.memory_allocation_method == 3 { + //Tiles + let mut found = false; + let mut loops = 0; + while !found { + for i in -loops..=loops { + for j in -loops..=loops { + if self.check_allocatable( + &TapeCell2D(region_start.0 + i, region_start.1 + j), + size, + ) { + found = true; + region_start = TapeCell2D(region_start.0 + i, region_start.1 + j); + break; + } + } + if found { + break; + } + } + loops += 1; + } + } else { + r_panic!( + "Memory allocation method {} not implemented.", + self.config.memory_allocation_method + ); + } + } + + // make all cells in the specified region allocated + for i in region_start.0..(region_start.0 + size as i32) { + if !self.cells.contains(&TapeCell2D(i, region_start.1)) { + self.cells.insert(TapeCell2D(i, region_start.1)); + } + } + + Ok(region_start) + } + + /// Allocate a cell as close as possible to the given cell, + /// used for optimisations which need extra cells for efficiency + fn allocate_temp_cell(&mut self, location: TapeCell2D) -> TapeCell2D { + // alternate left then right, getting further and further out + let mut left_iter = (0..=location.0).rev(); + let mut right_iter = (location.0 + 1)..; + loop { + if let Some(i) = left_iter.next() { + // unallocated cell, allocate it and return + if self.cells.insert(TapeCell2D(i, location.1)) { + return TapeCell2D(i, location.1); + } + } + + if let Some(i) = right_iter.next() { + if self.cells.insert(TapeCell2D(i, location.1)) { + return TapeCell2D(i, location.1); + } + } + } + } + + fn free(&mut self, cell: TapeCell2D, size: usize) -> Result<(), String> { + for i in cell.0..(cell.0 + size as i32) { + let c = TapeCell2D(i, cell.1); + r_assert!( + self.cells.remove(&c), + "Cannot free cell @{c} as it is not allocated." + ); + } + + Ok(()) + } +} + +impl BrainfuckProgram for BrainfuckBuilderData { + fn to_string(self) -> String { + self.opcodes.to_string() + } + + fn from_str(s: &str) -> BrainfuckBuilderData { + BrainfuckBuilderData { + opcodes: Vec::from_str(s), + head_pos: TapeCell2D(0, 0), + } + } +} + +impl BrainfuckBuilder for BrainfuckBuilderData { + fn new() -> BrainfuckBuilderData { + BrainfuckBuilderData { + opcodes: Vec::new(), + head_pos: TapeCell2D(0, 0), + } + } + fn len(&self) -> usize { + self.opcodes.len() + } + fn push(&mut self, op: Opcode2D) { + self.opcodes.push(op); + } + fn extend(&mut self, ops: T) + where + T: IntoIterator, + { + self.opcodes.extend(ops); + } + fn move_to_cell(&mut self, cell: TapeCell2D) { + let x = cell.0; + let y = cell.1; + let x_pos = self.head_pos.0; + let y_pos = self.head_pos.1; + //Move x level + if x_pos < x { + for _ in x_pos..x { + self.opcodes.push(Opcode2D::Right); + } + } else if x < x_pos { + // theoretically equivalent to cell..head_pos? + for _ in ((x + 1)..=x_pos).rev() { + self.opcodes.push(Opcode2D::Left); + } + } + //Move y level + if y_pos < y { + for _ in y_pos..y { + self.opcodes.push(Opcode2D::Up); + } + } else if y < y_pos { + // theoretically equivalent to cell..head_pos? + for _ in ((y + 1)..=y_pos).rev() { + self.opcodes.push(Opcode2D::Down); + } + } + self.head_pos = cell; + } + + fn add_to_current_cell(&mut self, imm: i8) { + if imm > 0 { + for _ in 0..imm { + self.opcodes.push(Opcode2D::Add); + } + } else if imm < 0 { + // needs to be i32 because -(-128) = -128 in i8-land + for _ in 0..-(imm as i32) { + self.opcodes.push(Opcode2D::Subtract); + } + } + } + + fn clear_current_cell(&mut self) { + self.opcodes.push(Opcode2D::OpenLoop); + self.opcodes.push(Opcode2D::Subtract); + self.opcodes.push(Opcode2D::CloseLoop); + } + fn output_current_cell(&mut self) { + self.opcodes.push(Opcode2D::Output); + } + fn input_to_current_cell(&mut self) { + self.opcodes.push(Opcode2D::Input); + } + fn open_loop(&mut self) { + self.opcodes.push(Opcode2D::OpenLoop); + } + fn close_loop(&mut self) { + self.opcodes.push(Opcode2D::CloseLoop); + } +} diff --git a/compiler/src/backend/common.rs b/compiler/src/backend/common.rs new file mode 100644 index 0000000..f772389 --- /dev/null +++ b/compiler/src/backend/common.rs @@ -0,0 +1,464 @@ +use super::constants_optimiser::calculate_optimal_addition; +use crate::{ + frontend::types::{CellLocation, Instruction, MemoryId}, + macros::macros::{r_assert, r_panic}, + misc::{MastermindConfig, MastermindContext}, + parser::types::TapeCellLocation, +}; + +use std::{ + collections::{HashMap, HashSet}, + num::Wrapping, +}; + +type LoopDepth = usize; +type TapeValue = u8; + +impl<'a> MastermindContext { + pub fn ir_to_bf( + &self, + instructions: Vec>, + return_to_cell: Option, + ) -> Result, String> + where + BrainfuckBuilderData: BrainfuckBuilder, + CellAllocatorData: CellAllocator, + { + let mut allocator = CellAllocatorData::new(self.config.clone()); + + struct AllocationMapEntry { + cell_base: TC, + size: usize, + alloc_loop_depth: LoopDepth, + known_values: Vec>, + } + let mut alloc_map: HashMap> = HashMap::new(); + + let mut loop_stack: Vec = Vec::new(); + let mut current_loop_depth: LoopDepth = 0; + let mut skipped_loop_depth: Option = None; + let mut ops = BrainfuckBuilderData::new(); + + for instruction in instructions { + if let Some(depth) = skipped_loop_depth { + // current loop is being skipped because of unreachable loop optimisations + match instruction { + Instruction::OpenLoop(_) => { + current_loop_depth += 1; + } + Instruction::CloseLoop(_) => { + current_loop_depth -= 1; + if current_loop_depth == depth { + skipped_loop_depth = None; + } + } + _ => (), + } + continue; + } + match instruction { + // the ids (indices really) given by the compiler are guaranteed to be unique (at the time of writing) + // however they will absolutely not be very efficient if used directly as cell locations + Instruction::Allocate(memory, location_specifier) => { + let cell = allocator.allocate(location_specifier, memory.len())?; + let None = alloc_map.insert( + memory.id(), + AllocationMapEntry { + cell_base: cell, + size: memory.len(), + alloc_loop_depth: current_loop_depth, + known_values: vec![Some(0); memory.len()], + }, + ) else { + r_panic!("Attempted to reallocate memory {memory:#?}"); + }; + } + Instruction::AssertCellValue(cell_obj, imm) => { + let Some(AllocationMapEntry { + cell_base: _, + size, + alloc_loop_depth, + known_values, + }) = alloc_map.get_mut(&cell_obj.memory_id) + else { + r_panic!( + "Attempted to assert value of cell {cell_obj:#?} \ +which could not be found" + ); + }; + + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let known_value = &mut known_values[mem_idx]; + + // allow the user to assert that we don't know the value of the cell by clobbering when we do inline brainfuck + if *alloc_loop_depth == current_loop_depth || imm.is_none() { + *known_value = imm; + } else { + r_panic!( + "Cannot assert cell {cell_obj:#?} value \ +outside of loop it was allocated" + ); + } + } + Instruction::Free(id) => { + // TODO: do I need to check alloc loop depth here? Or are cells never freed in an inner scope? + // think about this in regards to reusing cell space when a cell isn't being used + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth: _, + known_values, + }) = alloc_map.remove(&id) + else { + r_panic!("Attempted to free memory id {id} which could not be found"); + }; + + let None = known_values + .into_iter() + .find_map(|known_value| (known_value.unwrap_or(1) != 0).then_some(())) + else { + r_panic!( + "Attempted to free memory id {id} which has unknown or non-zero values" + ); + }; + + allocator.free(cell_base, size)?; + } + Instruction::OpenLoop(cell_obj) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth, + known_values, + }) = alloc_map.get_mut(&cell_obj.memory_id) + else { + r_panic!( + "Attempted to open loop at cell {cell_obj:#?} which could not be found" + ); + }; + + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + let known_value = &mut known_values[mem_idx]; + + let mut open = true; + + if let Some(known_value) = known_value { + if *alloc_loop_depth == current_loop_depth + && *known_value == 0 && self.config.optimise_unreachable_loops + { + open = false; + skipped_loop_depth = Some(current_loop_depth); + current_loop_depth += 1; + } + } + + // skip the loop if the optimisations are turned on and we know the value is 0 + if open { + ops.move_to_cell(cell); + ops.open_loop(); + loop_stack.push(cell); + current_loop_depth += 1; + } + } + Instruction::CloseLoop(cell_obj) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth, + known_values, + }) = alloc_map.get_mut(&cell_obj.memory_id) + else { + r_panic!( + "Attempted to close loop at cell {cell_obj:#?} which could not be found" + ); + }; + + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + let known_value = &mut known_values[mem_idx]; + + let Some(stack_cell) = loop_stack.pop() else { + r_panic!("Attempted to close un-opened loop"); + }; + r_assert!(cell == stack_cell, "Attempted to close a loop unbalanced"); + + current_loop_depth -= 1; + + ops.move_to_cell(cell); + ops.close_loop(); + + // if a loop finishes on a cell then it is guaranteed to be 0 based on brainfuck itself + // I did encounter issues with nested loops here, interesting + if current_loop_depth == *alloc_loop_depth { + *known_value = Some(0); + } + } + Instruction::AddToCell(cell_obj, imm) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth, + known_values, + }) = alloc_map.get_mut(&cell_obj.memory_id) + else { + r_panic!("Attempted to add to cell {cell_obj:#?} which could not be found"); + }; + + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + let known_value = &mut known_values[mem_idx]; + + // TODO: fix bug, if only one multiplication then we can have a value already in the cell, but never otherwise + + // not sure if these optimisations should be in the builder step or in the compiler + if self.config.optimise_constants { + // ops.move_to_cell(&mut head_pos, cell); + // here we use an algorithm that finds the best combo of products and constants to make the number to minimise bf code + // first we get the closest allocated cell so we can calculate the distance cost of multiplying + // TODO: instead find the nearest zero cell, doesn't matter if allocated or not + let temp_cell = allocator.allocate_temp_cell(cell); + + let optimised_ops = + calculate_optimal_addition(imm as i8, ops.head_pos, cell, temp_cell); + + ops.extend(optimised_ops.opcodes); + ops.head_pos = optimised_ops.head_pos; + + allocator.free(temp_cell, 1)?; + } else { + ops.move_to_cell(cell); + ops.add_to_current_cell(imm as i8); + } + + if imm != 0 { + if *alloc_loop_depth != current_loop_depth { + *known_value = None; + } else if let Some(known_value) = known_value { + *known_value = (Wrapping(*known_value) + Wrapping(imm)).0; + } + } + } + Instruction::InputToCell(cell_obj) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth: _, + known_values, + }) = alloc_map.get_mut(&cell_obj.memory_id) + else { + r_panic!( + "Attempted to input to cell {cell_obj:#?} which could not be found" + ); + }; + + // TODO: refactor this duplicate code (get_cell_safe or something like that) + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + let known_value = &mut known_values[mem_idx]; + + ops.move_to_cell(cell); + ops.input_to_current_cell(); + // no way to know at compile time what the input to the program will be + *known_value = None; + } + // Instruction::AssertCellValue(id, value) => {} + Instruction::ClearCell(cell_obj) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth, + known_values, + }) = alloc_map.get_mut(&cell_obj.memory_id) + else { + r_panic!("Attempted to clear cell {cell_obj:#?} which could not be found"); + }; + + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + let known_value = &mut known_values[mem_idx]; + + ops.move_to_cell(cell); + + let mut clear = true; + + if let Some(known_value) = known_value { + if self.config.optimise_cell_clearing + && *alloc_loop_depth == current_loop_depth + // not sure if this should be 4 or 3, essentially it depends on if we prefer clears or changes [-] vs ++--- + && (*known_value as i8).abs() < 4 + { + // let imm = *known_value as i8; + // if imm > 0 { + // for _ in 0..imm { + // ops.push(Opcode2D::Subtract); + // } + // } else if imm < 0 { + // for _ in 0..-imm { + // ops.push(Opcode2D::Add); + // } + // } + ops.add_to_current_cell(-(*known_value as i8)); + clear = false; + } + } + + if clear { + ops.clear_current_cell(); + } + + if *alloc_loop_depth == current_loop_depth { + *known_value = Some(0); + } else { + // TODO: fix this for if statements + *known_value = None; + } + } + Instruction::OutputCell(cell_obj) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth: _, + known_values: _, + }) = alloc_map.get(&cell_obj.memory_id) + else { + r_panic!("Attempted to output cell {cell_obj:#?} which could not be found"); + }; + + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + + ops.move_to_cell(cell); + ops.output_current_cell(); + } + Instruction::InsertBrainfuckAtCell(operations, location_specifier) => { + // move to the correct cell, based on the location specifier + match location_specifier { + CellLocation::FixedCell(cell) => ops.move_to_cell(cell.into()), + CellLocation::MemoryCell(cell_obj) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth: _, + known_values: _, + }) = alloc_map.get(&cell_obj.memory_id) + else { + r_panic!("Attempted to use location of cell {cell_obj:#?} which could not be found"); + }; + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + ops.move_to_cell(cell); + } + CellLocation::Unspecified => (), + } + + // paste the in-line BF operations + ops.extend(operations); + } + } + } + + // this is used in embedded brainfuck contexts to preserve head position + if let Some(origin_cell) = return_to_cell { + ops.move_to_cell(origin_cell.into()); + } + + Ok(ops.opcodes) + } +} + +/// This trait must be implemented for a cell location type for a Brainfuck variant +/// for now this is implemented by TapeCell (i32 1D location specifier), and TapeCell2D (2D) +pub trait TapeCellVariant +where + Self: PartialEq + Copy + Clone + Eq + TapeCellLocation, +{ + fn origin_cell() -> Self; + fn with_offset(&self, offset: i32) -> Self; +} + +/// This trait must be implemented for a Brainfuck variant +pub trait OpcodeVariant +where + Self: Sized + Clone + Copy, +{ + fn try_from_char(c: char) -> Option; +} + +pub struct CellAllocatorData { + pub cells: HashSet, + pub config: MastermindConfig, +} +impl CellAllocatorData { + fn new(config: MastermindConfig) -> CellAllocatorData { + CellAllocatorData { + cells: HashSet::new(), + config, + } + } +} + +pub trait CellAllocator { + fn check_allocatable(&mut self, location: &TC, size: usize) -> bool; + fn allocate(&mut self, location: Option, size: usize) -> Result; + fn allocate_temp_cell(&mut self, location: TC) -> TC; + fn free(&mut self, cell: TC, size: usize) -> Result<(), String>; +} + +pub struct BrainfuckBuilderData { + pub opcodes: Vec, + pub head_pos: TC, +} + +pub trait BrainfuckBuilder { + fn new() -> Self; + fn len(&self) -> usize; + fn push(&mut self, op: OC); + fn extend(&mut self, ops: T) + where + T: IntoIterator; + fn move_to_cell(&mut self, cell: TC); + fn add_to_current_cell(&mut self, imm: i8); + fn clear_current_cell(&mut self); + fn output_current_cell(&mut self); + fn input_to_current_cell(&mut self); + fn open_loop(&mut self); + fn close_loop(&mut self); +} + +pub trait BrainfuckProgram { + fn to_string(self) -> String; + fn from_str(s: &str) -> Self; +} diff --git a/compiler/src/constants_optimiser.rs b/compiler/src/backend/constants_optimiser.rs similarity index 51% rename from compiler/src/constants_optimiser.rs rename to compiler/src/backend/constants_optimiser.rs index 3ecf5ea..8284740 100644 --- a/compiler/src/constants_optimiser.rs +++ b/compiler/src/backend/constants_optimiser.rs @@ -1,5 +1,7 @@ -// TODO: make unit tests for this -use crate::builder::{BrainfuckCodeBuilder, Opcode, TapeCell}; +use super::common::{ + BrainfuckBuilder, BrainfuckBuilderData, CellAllocator, CellAllocatorData, OpcodeVariant, + TapeCellVariant, +}; // basically, most ascii characters are large numbers, which are more efficient to calculate with multiplication than with a bunch of + or - // an optimising brainfuck runtime will prefer a long string of +++++ or ----- however the goal of mastermind is to be used for code golf, which is not about speed @@ -8,42 +10,55 @@ use crate::builder::{BrainfuckCodeBuilder, Opcode, TapeCell}; // 7 * 4 : {>}(tricky)+++++++[<++++>-]< // 5 * 5 * 7 : +++++[>+++++<-]>[<+++++++>-]< -pub fn calculate_optimal_addition( +// TODO: make unit tests for this +pub fn calculate_optimal_addition( value: i8, - start_cell: TapeCell, - target_cell: TapeCell, - temp_cell: TapeCell, -) -> BrainfuckCodeBuilder { + start_cell: TC, + target_cell: TC, + temp_cell: TC, +) -> BrainfuckBuilderData +where + BrainfuckBuilderData: BrainfuckBuilder, + CellAllocatorData: CellAllocator, +{ // can't abs() i8 directly because there is no +128i8, so abs(-128i8) crashes let abs_value = (value as i32).abs(); // STAGE 0: // for efficiency's sake, calculate the cost of just adding the constant to the cell - let solution_0 = { - let mut ops = BrainfuckCodeBuilder::new(); + let naive_solution = { + let mut ops = BrainfuckBuilderData::new(); ops.head_pos = start_cell; ops.move_to_cell(target_cell); ops.add_to_current_cell(value); ops }; - // https://esolangs.org/wiki/Brainfuck_constants + + // below 15 is pointless according to: https://esolangs.org/wiki/Brainfuck_constants if abs_value < 15 { - return solution_0; + return naive_solution; } // STAGE 1: // find best solution of form a * b + c let solution_1 = { - let mut previous_best: Vec<(usize, usize, usize)> = vec![(0, 0, 0)]; + // dynamic programming algorithm, although not generalised + // initialise so element 0 is also valid + let mut best_combinations: Vec<(usize, usize, usize)> = vec![(0, 0, 0)]; + // Loop until the target number, + // inner loop finds any (a, b)s where a * b = the iteration number i. + // Second inner loop finds c terms so that for each main iteration: + // there is some (a, b, c) where a * b + c = i. + // This finds the "cheapest" meaning the (a, b, c) where a + b + c is lowest. for i in 1..=(abs_value as usize) { - let mut cheapest: (usize, usize, usize) = (1, i, 0); + let mut current_best: (usize, usize, usize) = (1, i, 0); let mut j = 2; while j * j <= i { if i % j == 0 { let o = i / j; - if (j + o) < (cheapest.0 + cheapest.1) { - cheapest = (j, o, 0); + if (j + o) < (current_best.0 + current_best.1) { + current_best = (j, o, 0); } } @@ -52,22 +67,23 @@ pub fn calculate_optimal_addition( for j in 0..i { let diff = i - j; - let (a, b, c) = previous_best[j]; - if (a + b + c + diff) < (cheapest.0 + cheapest.1 + cheapest.2) { - cheapest = (a, b, c + diff); + let (a, b, c) = best_combinations[j]; + if (a + b + c + diff) < (current_best.0 + current_best.1 + current_best.2) { + current_best = (a, b, c + diff); } } - previous_best.push(cheapest); + best_combinations.push(current_best); } - let (a, b, c) = previous_best.into_iter().last().unwrap(); - let mut ops = BrainfuckCodeBuilder::new(); + assert_eq!(best_combinations.len(), (abs_value as usize) + 1); + let (a, b, c) = best_combinations.into_iter().last().unwrap(); + let mut ops = BrainfuckBuilderData::new(); ops.head_pos = start_cell; ops.move_to_cell(temp_cell); ops.add_to_current_cell(a as i8); - ops.push(Opcode::OpenLoop); + ops.open_loop(); ops.add_to_current_cell(-1); ops.move_to_cell(target_cell); if value < 0 { @@ -76,7 +92,7 @@ pub fn calculate_optimal_addition( ops.add_to_current_cell(b as i8); } ops.move_to_cell(temp_cell); - ops.push(Opcode::CloseLoop); + ops.close_loop(); ops.move_to_cell(target_cell); if value < 0 { ops.add_to_current_cell(-(c as i8)); @@ -93,9 +109,9 @@ pub fn calculate_optimal_addition( // compare best solutions - if solution_1.len() < solution_0.len() { + if solution_1.len() < naive_solution.len() { solution_1 } else { - solution_0 + naive_solution } } diff --git a/compiler/src/backend/mod.rs b/compiler/src/backend/mod.rs new file mode 100644 index 0000000..8f79770 --- /dev/null +++ b/compiler/src/backend/mod.rs @@ -0,0 +1,6 @@ +pub mod common; + +pub mod bf; +pub mod bf2d; + +mod constants_optimiser; diff --git a/compiler/src/brainfuck.rs b/compiler/src/brainfuck.rs index eae1b5b..ed8bf6f 100644 --- a/compiler/src/brainfuck.rs +++ b/compiler/src/brainfuck.rs @@ -3,34 +3,36 @@ use std::{ collections::HashMap, - fmt, io::{Read, Write}, num::Wrapping, }; -use crate::macros::macros::r_panic; +use crate::{ + backend::{bf2d::TapeCell2D, common::TapeCellVariant}, + macros::macros::r_panic, +}; use wasm_bindgen::{JsCast, JsValue}; use wasm_bindgen_futures::JsFuture; -struct Tape { - memory_map: HashMap<(i32, i32), Wrapping>, - head_position: (i32, i32), +struct Tape { + memory_map: HashMap>, + head_position: TC, } -impl Tape { +impl Tape { fn new() -> Self { Tape { memory_map: HashMap::new(), - head_position: (0, 0), + head_position: TapeCell2D(0, 0), } } - fn get_cell(&self, position: (i32, i32)) -> Wrapping { + fn get_cell(&self, position: TapeCell2D) -> Wrapping { match self.memory_map.get(&position) { Some(val) => *val, None => Wrapping(0), } } - fn move_head_position(&mut self, amount: (i32, i32)) { + fn move_head_position(&mut self, amount: TapeCell2D) { self.head_position.0 += amount.0; self.head_position.1 += amount.1; } @@ -57,78 +59,13 @@ impl Tape { } } -impl fmt::Display for Tape { - // absolutely horrible code here, not even used ever so should just get rid of it - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut line_0 = String::with_capacity(50); - let mut line_1 = String::with_capacity(50); - let mut line_2 = String::with_capacity(50); - let mut line_3 = String::with_capacity(50); - let mut line_4 = String::with_capacity(50); - - // disgusting - line_0.push('|'); - line_1.push('|'); - line_2.push('|'); - line_3.push('|'); - line_4.push('|'); - - for pos in (self.head_position.1 - 10)..(self.head_position.1 + 10) { - let val = self.get_cell((pos, 0)).0; - let mut dis = 32u8; - if val.is_ascii_alphanumeric() || val.is_ascii_punctuation() { - dis = val; - } - - // dodgy af, I don't know rust or the best way but I know this isn't - line_0.push_str(format!("{val:03}").as_str()); - - line_1.push_str(format!("{:3}", (val as i8)).as_str()); - - line_2.push_str(format!(" {val:02x}").as_str()); - - line_3.push(' '); - line_3.push(' '); - line_3.push(dis as char); - - line_4 += match pos == self.head_position.1 { - true => "^^^", - false => "---", - }; - - line_0.push('|'); - line_1.push('|'); - line_2.push('|'); - line_3.push('|'); - line_4.push('|'); - } - - // disgusting but I just want this to work - let _ = f.write_str("\n"); - let _ = f.write_str(&line_0); - let _ = f.write_str("\n"); - let _ = f.write_str(&line_1); - let _ = f.write_str("\n"); - let _ = f.write_str(&line_2); - let _ = f.write_str("\n"); - let _ = f.write_str(&line_3); - let _ = f.write_str("\n"); - let _ = f.write_str(&line_4); - let _ = f.write_str("\n"); - - Ok(()) - } -} - -pub struct BVMConfig { +pub struct BrainfuckConfig { pub enable_debug_symbols: bool, pub enable_2d_grid: bool, } -pub struct BVM { - config: BVMConfig, - tape: Tape, - program: Vec, +pub struct BrainfuckContext { + pub config: BrainfuckConfig, } pub trait AsyncByteReader { @@ -139,54 +76,34 @@ pub trait ByteWriter { fn write_byte(&mut self, byte: u8); } -impl BVM { +impl BrainfuckContext { const MAX_STEPS_DEFAULT: usize = (2 << 30) - 2; - pub fn new(config: BVMConfig, program: Vec) -> Self { - BVM { - config, - tape: Tape::new(), - program, - } - } - // TODO: refactor/rewrite this, can definitely be improved with async read/write traits or similar // I don't love that I duplicated this to make it work with js // TODO: this isn't covered by unit tests - // TODO: add a maximum step count pub async fn run_async( - &mut self, + &self, + program: Vec, output_callback: &js_sys::Function, input_callback: &js_sys::Function, ) -> Result { + let mut tape = Tape::new(); let mut pc: usize = 0; // this could be more efficient with a pre-computed map let mut loop_stack: Vec = Vec::new(); let mut output_bytes: Vec = Vec::new(); - while pc < self.program.len() { + while pc < program.len() { match ( - self.program[pc], + program[pc], self.config.enable_debug_symbols, self.config.enable_2d_grid, ) { - ('+', _, _) => self.tape.increment_current_cell(Wrapping(1)), - ('-', _, _) => self.tape.increment_current_cell(Wrapping(-1i8 as u8)), + ('+', _, _) => tape.increment_current_cell(Wrapping(1)), + ('-', _, _) => tape.increment_current_cell(Wrapping(-1i8 as u8)), (',', _, _) => { - // https://github.com/rustwasm/wasm-bindgen/issues/2195 - // let password_jsval: JsValue = func.call1(&this, &JsValue::from_bool(true))?; - // let password_promise_res: Result = - // password_jsval.dyn_into(); - // let password_promise = password_promise_res - // .map_err(|_| "Function askUnlockPassword does not return a Promise") - // .map_err(err_to_js)?; - // let password_jsstring = JsFuture::from(password_promise).await?; - // let password = password_jsstring - // .as_string() - // .ok_or("Promise didn't return a String") - // .map_err(err_to_js)?; - // TODO: handle errors let jsval = input_callback .call0(&JsValue::null()) @@ -202,11 +119,11 @@ impl BVM { .as_f64() .expect("Could not convert js number into f64 type"); let byte: u8 = num as u8; // I have no idea if this works (TODO: test) - self.tape.set_current_cell(Wrapping(byte)); + tape.set_current_cell(Wrapping(byte)); } ('.', _, _) => { // TODO: handle errors - let byte = self.tape.get_current_cell().0; + let byte = tape.get_current_cell().0; let fnum: f64 = byte as f64; // I have no idea if this works (TODO: test again) output_callback .call1(&JsValue::null(), &JsValue::from_f64(fnum)) @@ -215,20 +132,20 @@ impl BVM { output_bytes.push(byte); } ('>', _, _) => { - self.tape.move_head_position((1, 0)); + tape.move_head_position(TapeCell2D(1, 0)); } ('<', _, _) => { - self.tape.move_head_position((-1, 0)); + tape.move_head_position(TapeCell2D(-1, 0)); } ('[', _, _) => { // entering a loop - if self.tape.get_current_cell().0 == 0 { + if tape.get_current_cell().0 == 0 { // skip the loop, (advance to the corresponding closing loop brace) // TODO: make this more efficient by pre-computing a loops map let mut loop_count = 1; while loop_count > 0 { pc += 1; - loop_count += match self.program[pc] { + loop_count += match program[pc] { '[' => 1, ']' => -1, _ => 0, @@ -240,7 +157,7 @@ impl BVM { } } (']', _, _) => { - if self.tape.get_current_cell().0 == 0 { + if tape.get_current_cell().0 == 0 { // exit the loop loop_stack.pop(); } else { @@ -249,8 +166,8 @@ impl BVM { pc = loop_stack[loop_stack.len() - 1]; } } - ('^', _, true) => self.tape.move_head_position((0, 1)), - ('v', _, true) => self.tape.move_head_position((0, -1)), + ('^', _, true) => tape.move_head_position(TapeCell2D(0, 1)), + ('v', _, true) => tape.move_head_position(TapeCell2D(0, -1)), ('^', _, false) => { r_panic!("2D Brainfuck currently disabled"); } @@ -258,10 +175,10 @@ impl BVM { r_panic!("2D Brainfuck currently disabled"); } // ('#', true, ) => { - // println!("{}", self.tape); + // println!("{}", tape); // } // ('@', true, _) => { - // print!("{}", self.tape.get_current_cell().0 as i32); + // print!("{}", tape.get_current_cell().0 as i32); // } _ => (), }; @@ -271,7 +188,7 @@ impl BVM { // .iter() // .collect(); // println!("{s}"); - // println!("{}", self.tape); + // println!("{}", tape); pc += 1; } @@ -279,48 +196,50 @@ impl BVM { } pub fn run( - &mut self, + &self, + program: Vec, input: &mut impl Read, output: &mut impl Write, max_steps: Option, ) -> Result<(), String> { + let mut tape = Tape::new(); let mut steps = 0usize; let mut pc: usize = 0; // this could be more efficient with a pre-computed map let mut loop_stack: Vec = Vec::new(); - while pc < self.program.len() { + while pc < program.len() { match ( - self.program[pc], + program[pc], self.config.enable_debug_symbols, self.config.enable_2d_grid, ) { - ('+', _, _) => self.tape.increment_current_cell(Wrapping(1)), - ('-', _, _) => self.tape.increment_current_cell(Wrapping(-1i8 as u8)), + ('+', _, _) => tape.increment_current_cell(Wrapping(1)), + ('-', _, _) => tape.increment_current_cell(Wrapping(-1i8 as u8)), (',', _, _) => { let mut buf = [0; 1]; let _ = input.read_exact(&mut buf); - self.tape.set_current_cell(Wrapping(buf[0])); + tape.set_current_cell(Wrapping(buf[0])); } ('.', _, _) => { - let buf = [self.tape.get_current_cell().0]; + let buf = [tape.get_current_cell().0]; let _ = output.write(&buf); } ('>', _, _) => { - self.tape.move_head_position((1, 0)); + tape.move_head_position(TapeCell2D(1, 0)); } ('<', _, _) => { - self.tape.move_head_position((-1, 0)); + tape.move_head_position(TapeCell2D(-1, 0)); } ('[', _, _) => { // entering a loop - if self.tape.get_current_cell().0 == 0 { + if tape.get_current_cell().0 == 0 { // skip the loop, (advance to the corresponding closing loop brace) // TODO: make this more efficient by pre-computing a loops map let mut loop_count = 1; while loop_count > 0 { pc += 1; - loop_count += match self.program[pc] { + loop_count += match program[pc] { '[' => 1, ']' => -1, _ => 0, @@ -332,7 +251,7 @@ impl BVM { } } (']', _, _) => { - if self.tape.get_current_cell().0 == 0 { + if tape.get_current_cell().0 == 0 { // exit the loop loop_stack.pop(); } else { @@ -341,8 +260,8 @@ impl BVM { pc = loop_stack[loop_stack.len() - 1]; } } - ('^', _, true) => self.tape.move_head_position((0, 1)), - ('v', _, true) => self.tape.move_head_position((0, -1)), + ('^', _, true) => tape.move_head_position(TapeCell2D(0, 1)), + ('v', _, true) => tape.move_head_position(TapeCell2D(0, -1)), ('^', _, false) => { r_panic!("2D Brainfuck currently disabled"); } @@ -350,10 +269,10 @@ impl BVM { r_panic!("2D Brainfuck currently disabled"); } // '#' => { - // println!("{}", self.tape); + // println!("{}", tape); // } // '@' => { - // print!("{}", self.tape.get_current_cell().0 as i32); + // print!("{}", tape.get_current_cell().0 as i32); // } _ => (), }; @@ -363,7 +282,7 @@ impl BVM { // .iter() // .collect(); // println!("{s}"); - // println!("{}", self.tape); + // println!("{}", tape); pc += 1; // cut the program short if it runs forever @@ -381,185 +300,261 @@ impl BVM { } #[cfg(test)] -pub mod tests { +pub mod bvm_tests { // TODO: add unit tests for Tape use super::*; use std::io::Cursor; pub fn run_code( - config: BVMConfig, - code: String, - input: String, + config: BrainfuckConfig, + code: &str, + input: &str, max_steps_cutoff: Option, - ) -> String { - let mut bvm = BVM::new(config, code.chars().collect()); + ) -> Result { + let ctx = BrainfuckContext { config }; let input_bytes: Vec = input.bytes().collect(); let mut input_stream = Cursor::new(input_bytes); - let mut output_stream = Cursor::new(Vec::new()); + let mut output_stream = Cursor::new(vec![]); - bvm.run(&mut input_stream, &mut output_stream, max_steps_cutoff) - .unwrap(); + ctx.run( + code.chars().collect(), + &mut input_stream, + &mut output_stream, + max_steps_cutoff, + )?; // TODO: fix this unsafe stuff - unsafe { String::from_utf8_unchecked(output_stream.into_inner()) } + Ok(unsafe { String::from_utf8_unchecked(output_stream.into_inner()) }) } - const BVM_CONFIG_1D: BVMConfig = BVMConfig { + const BVM_CONFIG_1D: BrainfuckConfig = BrainfuckConfig { enable_debug_symbols: false, enable_2d_grid: false, }; - const BVM_CONFIG_2D: BVMConfig = BVMConfig { + const BVM_CONFIG_2D: BrainfuckConfig = BrainfuckConfig { enable_debug_symbols: false, enable_2d_grid: true, }; - #[test] - fn dummy_test() { - let program = String::from(""); - let input = String::from(""); - let desired_output = String::from(""); - assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, program, input, None) - ) - } - #[test] fn hello_world_1() { - let program = String::from("++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++."); - let input = String::from(""); - let desired_output = String::from("Hello World!\n"); assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, program, input, None) - ) + run_code( + BVM_CONFIG_1D, + "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.\ ++++.------.--------.>>+.>++.", + "", + None + ) + .unwrap(), + "Hello World!\n" + ); } #[test] fn hello_world_2() { - let program = String::from( - "+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+.", - ); - let input = String::from(""); - let desired_output = String::from("Hello, World!"); assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, program, input, None) + run_code( + BVM_CONFIG_1D, + "+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+.", + "", + None + ) + .unwrap(), + "Hello, World!" ) } #[test] fn random_mess() { - let program = String::from("+++++[>+++++[>++>++>+++>+++>++++>++++<<<<<<-]<-]+++++[>>[>]<[+.<<]>[++.>>>]<[+.<]>[-.>>]<[-.<<<]>[.>]<[+.<]<-]++++++++++."); - let input = String::from(""); - let desired_output = String::from("eL34NfeOL454KdeJ44JOdefePK55gQ67ShfTL787KegJ77JTeghfUK88iV9:XjgYL:;:KfiJ::JYfijgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n"); + // test case stolen from https://code.golf assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, program, input, None) + run_code( + BVM_CONFIG_1D, + "+++++[>+++++[>++>++>+++>+++>++++>++++<<<<<<-]<-]+++++[>>[>]<[+.<<]>[++.>>>]<[+\ +.<]>[-.>>]<[-.<<<]>[.>]<[+.<]<-]++++++++++.", + "", + None + ) + .unwrap(), + "eL34NfeOL454KdeJ44JOdefePK55gQ67ShfTL787KegJ77JTeghfUK88iV9:XjgYL:;:KfiJ::JYfi\ +jgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n" ) } #[test] - #[should_panic(expected = "2D Brainfuck currently disabled")] fn grid_disabled_1() { - let program = String::from("++++++++[->++++++[->+>+<<]<]>>.>^+++."); - let input = String::from(""); - run_code(BVM_CONFIG_1D, program, input, None); + assert_eq!( + run_code( + BVM_CONFIG_1D, + "++++++++[->++++++[->+>+<<]<]>>.>^+++.", + "", + None, + ) + .unwrap_err(), + "2D Brainfuck currently disabled" + ); } #[test] - #[should_panic(expected = "2D Brainfuck currently disabled")] fn grid_disabled_2() { - let program = - String::from("++++++++[->^^^+++vvvv+++[->^^^^+>+^^^^^^^^>.>vvvv+++."); - let input = String::from(""); - run_code(BVM_CONFIG_1D, program, input, None); + assert_eq!( + run_code( + BVM_CONFIG_1D, + "++++++++[->^^^+++vvvv+++[->^^^^+>+^^^^^^^^>.>vvvv+++.", + "", + None, + ) + .unwrap_err(), + "2D Brainfuck currently disabled" + ); } // 2D tests: #[test] fn grid_regression_1() { - // hello world - let program = String::from("++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++."); - let input = String::from(""); - let desired_output = String::from("Hello World!\n"); assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.\ ++++.------.--------.>>+.>++.", + "", + None + ) + .unwrap(), + "Hello World!\n" ) } #[test] fn grid_regression_2() { - // random mess - let program = String::from("+++++[>+++++[>++>++>+++>+++>++++>++++<<<<<<-]<-]+++++[>>[>]<[+.<<]>[++.>>>]<[+.<]>[-.>>]<[-.<<<]>[.>]<[+.<]<-]++++++++++."); - let input = String::from(""); - let desired_output = String::from("eL34NfeOL454KdeJ44JOdefePK55gQ67ShfTL787KegJ77JTeghfUK88iV9:XjgYL:;:KfiJ::JYfijgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n"); + // test case stolen from https://code.golf assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + "+++++[>+++++[>++>++>+++>+++>++++>++++<<<<<<-]<-]+++++[>>[>]<[+.<<]>[++.>>>]<[+\ +.<]>[-.>>]<[-.<<<]>[.>]<[+.<]<-]++++++++++.", + "", + None + ) + .unwrap(), + "eL34NfeOL454KdeJ44JOdefePK55gQ67ShfTL787KegJ77JTeghfUK88iV9:XjgYL:;:KfiJ::JYfi\ +jgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n" ) } #[test] fn grid_basic_1() { - let program = String::from("++++++++[-^++++++[->+v+<^]v]>+++++^.v."); - let input = String::from(""); - let desired_output = String::from("05"); assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + "++++++++[-^++++++[->+v+<^]v]>+++++^.v.", + "", + None + ) + .unwrap(), + "05" ) } #[test] fn grid_mover_1() { - let program = String::from( - "-<<<<<<<<<<<<^^^^^^^^^^^^-<^++++++++[->>vv+[->v+]->v++++++<^<^+[-<^+]-<^]>>vv+[->v+]->v...", - ); - let input = String::from(""); - let desired_output = String::from("000"); assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + "-<<<<<<<<<<<<^^^^^^^^^^^^-<^++++++++[->>vv+[->v+]->v++++++<^<^+[-<^+]-<^]>>vv+\ +[->v+]->v...", + "", + None + ) + .unwrap(), + "000", ) } #[test] fn grid_bfception_1() { - // run a hello world program within a 1d brainfuck interpreter implemented in 2d brainfuck - let program = String::from("-v>,[>,]^-<+[-<+]->+[-v------------------------------------------^>+]-<+[-<+]->+[-v[-^+^+vv]^[-v+^]^->+<[>-<->+<[>-<->+<[>-<->+<[>-<-------------->+<[>-<-->+<[>-<----------------------------->+<[>-<-->+<[>-[[-]<[-]vv[-]++++++v++^^^>]<[-]]>[[-]<[-]vv[-]+++++v+^^^>]<[-]]>[[-]<[-]vv[-]+++^^>]<[-]]>[[-]<[-]vv[-]++++^^>]<[-]]>[[-]<[-]vv[-]+++++++^^>]<[-]]>[[-]<[-]vv[-]++^^>]<[-]]>[[-]<[-]vv[-]++++++++^^>]<[-]]>[[-]<[-]vv[-]+^^>]+]-v-v-v-v-^^^^<+[-<+]<->v-v-v<-v->^^^^>vvv+^^^<+>+[-<->+v[-^^+^+vvv]^^[-vv+^^]^>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<[-]]>[--[+>-]+v,^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v.^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v]v[[-]^^^+[-<+]-^^^+[->+]-<+[>>-[+>-]<+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]vvv+[-<+]->-[+>-]+vvv]^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v]v>+<[>-<[-]]>[-<^^^+[-<+]-^^^+[->+]-<+[>>-[+>-]>+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]vvv+[-<+]->-[+>-]+vvv>]<^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+<<-v-^>+v+^[<+v+^>-v-^]+>-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+>>-v-^<+v+^[>+v+^<-v-^]+<-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v-^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v+^+[-<+]-<^^^+[->+]->-[+>-]+^^>]+]-"); - let input = String::from("++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++.\n"); - let desired_output = String::from("Hello World!\n"); + // hello world run inside a brainfuck interpreter written in 2d brainfuck assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + "-v>,[>,]^-<+[-<+]->+[-v------------------------------------------^>+]-<+[-<+]\ +->+[-v[-^+^+vv]^[-v+^]^->+<[>-<->+<[>-<->+<[>-<->+<[>-<-------------->+<[>-<-->\ ++<[>-<----------------------------->+<[>-<-->+<[>-[[-]<[-]vv[-]+++\ ++++v++^^^>]<[-]]>[[-]<[-]vv[-]+++++v+^^^>]<[-]]>[[-]<[-]vv[-]+++^^>]<[-]]>[[-]<\ +[-]vv[-]++++^^>]<[-]]>[[-]<[-]vv[-]+++++++^^>]<[-]]>[[-]<[-]vv[-]++^^>]<[-]]>[[\ +-]<[-]vv[-]++++++++^^>]<[-]]>[[-]<[-]vv[-]+^^>]+]-v-v-v-v-^^^^<+[-<+]<->v-\ +v-v<-v->^^^^>vvv+^^^<+>+[-<->+v[-^^+^+vvv]^^[-vv+^^]^>+<-[>[-]<>+<-[>[-]<>+<-[>\ +[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<[-]]>[--[+>\ +-]+v,^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v.^+[-<+]-<^^^+[-\ +>+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v]v[[-]^^^+[-<+]-^^^+[\ +->+]-<+[>>-[+>-]<+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]vvv+[-<+]->-[+>-]\ ++vvv]^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+\ +v]v>+<[>-<[-]]>[-<^^^+[-<+]-^^^+[->+]-<+[>>-[+>-]>+vv[-^^^+^+vvvv]^^^[-vvv+^^^]\ +^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]\ +vvv+[-<+]->-[+>-]+vvv>]<^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[-<\ +vvvvv+[-<+]->-[+>-]+<<-v-^>+v+^[<+v+^>-v-^]+>-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>\ +[--[+>-]+>>-v-^<+v+^[>+v+^<-v-^]+<-+[-<+]-<^^^+[->+]->-[+>-]+^^>]\ +<]>[--[+>-]+v-^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--\ +[+>-]+v+^+[-<+]-<^^^+[->+]->-[+>-]+^^>]+]-", + "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.\ ++++.------.--------.>>+.>++.\n", + None + ) + .unwrap(), + "Hello World!\n" ) } #[test] fn grid_bfception_2() { - // random mess - let program = String::from("-v>,[>,]^-<+[-<+]->+[-v------------------------------------------^>+]-<+[-<+]->+[-v[-^+^+vv]^[-v+^]^->+<[>-<->+<[>-<->+<[>-<->+<[>-<-------------->+<[>-<-->+<[>-<----------------------------->+<[>-<-->+<[>-[[-]<[-]vv[-]++++++v++^^^>]<[-]]>[[-]<[-]vv[-]+++++v+^^^>]<[-]]>[[-]<[-]vv[-]+++^^>]<[-]]>[[-]<[-]vv[-]++++^^>]<[-]]>[[-]<[-]vv[-]+++++++^^>]<[-]]>[[-]<[-]vv[-]++^^>]<[-]]>[[-]<[-]vv[-]++++++++^^>]<[-]]>[[-]<[-]vv[-]+^^>]+]-v-v-v-v-^^^^<+[-<+]<->v-v-v<-v->^^^^>vvv+^^^<+>+[-<->+v[-^^+^+vvv]^^[-vv+^^]^>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<[-]]>[--[+>-]+v,^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v.^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v]v[[-]^^^+[-<+]-^^^+[->+]-<+[>>-[+>-]<+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]vvv+[-<+]->-[+>-]+vvv]^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v]v>+<[>-<[-]]>[-<^^^+[-<+]-^^^+[->+]-<+[>>-[+>-]>+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]vvv+[-<+]->-[+>-]+vvv>]<^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+<<-v-^>+v+^[<+v+^>-v-^]+>-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+>>-v-^<+v+^[>+v+^<-v-^]+<-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v-^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v+^+[-<+]-<^^^+[->+]->-[+>-]+^^>]+]-"); - let input = String::from("+++++[>+++++[>++>++>+++>+++>++++>++++<<<<<<-]<-]+++++[>>[>]<[+.<<]>[++.>>>]<[+.<]>[-.>>]<[-.<<<]>[.>]<[+.<]<-]++++++++++.\n"); - let desired_output = String::from("eL34NfeOL454KdeJ44JOdefePK55gQ67ShfTL787KegJ77JTeghfUK88iV9:XjgYL:;:KfiJ::JYfijgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n"); + // random mess test from https://code.golf run in brainfuck interpreter written in 2d brainfuck assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + "-v>,[>,]^-<+[-<+]->+[-v------------------------------------------^>+]-<+[-<+]-\ +>+[-v[-^+^+vv]^[-v+^]^->+<[>-<->+<[>-<->+<[>-<->+<[>-<-------------->+<[>-<-->+\ +<[>-<----------------------------->+<[>-<-->+<[>-[[-]<[-]vv[-]++++\ +++v++^^^>]<[-]]>[[-]<[-]vv[-]+++++v+^^^>]<[-]]>[[-]<[-]vv[-]+++^^>]<[-]]>[[-]<[\ +-]vv[-]++++^^>]<[-]]>[[-]<[-]vv[-]+++++++^^>]<[-]]>[[-]<[-]vv[-]++^^>]<[-]]>[[-\ +]<[-]vv[-]++++++++^^>]<[-]]>[[-]<[-]vv[-]+^^>]+]-v-v-v-v-^^^^<+[-<+]<->v-v\ +-v<-v->^^^^>vvv+^^^<+>+[-<->+v[-^^+^+vvv]^^[-vv+^^]^>+<-[>[-]<>+<-[>[-]<>+<-[>[\ +-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<[-]]>[--[+>-\ +]+v,^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v.^+[-<+]-<^^^+[->\ ++]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v]v[[-]^^^+[-<+]-^^^+[-\ +>+]-<+[>>-[+>-]<+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]vvv+[-<+]->-[+>-]+\ +vvv]^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v\ +]v>+<[>-<[-]]>[-<^^^+[-<+]-^^^+[->+]-<+[>>-[+>-]>+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^\ +->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]<\ +vv+[-<+]-<][-]>vvv+[-<+]->-[+>-]+vvv>]<^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+<<-v-^>+v+^[<+v+^>-v-^]+>-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[\ +--[+>-]+>>-v-^<+v+^[>+v+^<-v-^]+<-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<\ +]>[--[+>-]+v-^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[\ ++>-]+v+^+[-<+]-<^^^+[->+]->-[+>-]+^^>]+]-", + "+++++[>+++++[>++>++>+++>+++>++++>++++<<<<<<-]<-]+++++[>>[>]<[+.<<]>[++.>>>]<[+\ +.<]>[-.>>]<[-.<<<]>[.>]<[+.<]<-]++++++++++.\n", + None + ) + .unwrap(), + "eL34NfeOL454KdeJ44JOdefePK55gQ67ShfTL787KegJ77JTeghfUK88iV9:XjgYL:;:KfiJ::JYfi\ +jgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n" ) } #[test] fn test_bf2d_code() { - let program = String::from( - ",.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.", - ); - let input = String::from(""); - let desired_output = String::from("\0Hello, World!"); assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + ",.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.", + "", + None + ) + .unwrap(), + "\0Hello, World!" ) } } diff --git a/compiler/src/brainfuck_optimiser.rs b/compiler/src/brainfuck_optimiser.rs index 0072896..9b5d153 100644 --- a/compiler/src/brainfuck_optimiser.rs +++ b/compiler/src/brainfuck_optimiser.rs @@ -1,496 +1,532 @@ -use crate::builder::Opcode; use itertools::Itertools; use std::{collections::HashMap, num::Wrapping}; -// post-compilation optimisations - -// simple naive brainfuck optimisations -// TODO: factor in [-] into optimisations (doing) - -pub fn optimise(program: Vec, exhaustive: bool) -> Vec { - let mut output = Vec::new(); - - // get stretch of characters to optimise (+-<>) - let mut i = 0; - let mut subset = Vec::new(); - while i < program.len() { - let op = program[i]; - match op { - Opcode::Add - | Opcode::Subtract - | Opcode::Right - | Opcode::Left - | Opcode::Clear - | Opcode::Up - | Opcode::Down => { - subset.push(op); - } - Opcode::OpenLoop | Opcode::CloseLoop | Opcode::Input | Opcode::Output => { - // optimise subset and push - let optimised_subset = optimise_subset(subset, exhaustive); - output.extend(optimised_subset); +use crate::{ + backend::bf2d::{Opcode2D, TapeCell2D}, + misc::MastermindContext, +}; + +// originally trivial post-compilation brainfuck optimisations +// extended to 2D which makes it more difficult +impl MastermindContext { + pub fn optimise_bf_code(&self, program: Vec) -> Vec { + let mut output = Vec::new(); + + // get stretch of characters to optimise (+-<>) + let mut i = 0; + let mut subset = Vec::new(); + while i < program.len() { + let op = program[i]; + match op { + Opcode2D::Add + | Opcode2D::Subtract + | Opcode2D::Right + | Opcode2D::Left + | Opcode2D::Clear + | Opcode2D::Up + | Opcode2D::Down => { + subset.push(op); + } + Opcode2D::OpenLoop | Opcode2D::CloseLoop | Opcode2D::Input | Opcode2D::Output => { + // optimise subset and push + let optimised_subset = self.optimise_subset(subset); + output.extend(optimised_subset); - subset = Vec::new(); - output.push(op); + subset = Vec::new(); + output.push(op); + } } + i += 1; } - i += 1; - } - - output -} -fn move_position( - mut program: Vec, - old_position: &(i32, i32), - new_position: &(i32, i32), -) -> Vec { - if old_position != new_position { - if old_position.0 < new_position.0 { - for _ in 0..(new_position.0 - old_position.0) { - program.push(Opcode::Right); - } - } else { - for _ in 0..(old_position.0 - new_position.0) { - program.push(Opcode::Left); - } - } - if old_position.1 < new_position.1 { - for _ in 0..(new_position.1 - old_position.1) { - program.push(Opcode::Up); - } - } else { - for _ in 0..(old_position.1 - new_position.1) { - program.push(Opcode::Down); - } - } + output } - program -} -fn optimise_subset(run: Vec, exhaustive: bool) -> Vec { - #[derive(Clone)] - enum Change { - Add(Wrapping), - Set(Wrapping), - } - let mut tape: HashMap<(i32, i32), Change> = HashMap::new(); - let start = (0, 0); - let mut head = (0, 0); - let mut i = 0; - //Generate a map of cells we change and how we plan to change them - while i < run.len() { - let op = run[i]; - match op { - Opcode::Clear => { - tape.insert(head, Change::Set(Wrapping(0i8))); - } - Opcode::Subtract | Opcode::Add => { - let mut change = tape.remove(&head).unwrap_or(Change::Add(Wrapping(0i8))); - - let (Change::Add(val) | Change::Set(val)) = &mut change; - *val += match op { - Opcode::Add => 1, - Opcode::Subtract => -1, - _ => 0, - }; - - match &change { - Change::Add(val) => { - if *val != Wrapping(0i8) { + fn optimise_subset(&self, run: Vec) -> Vec { + #[derive(Clone)] + enum Change { + Add(Wrapping), + Set(Wrapping), + } + let mut tape: HashMap = HashMap::new(); + let start = TapeCell2D(0, 0); + let mut head = TapeCell2D(0, 0); + let mut i = 0; + // simulate the subprogram to find the exact changes made to the tape + while i < run.len() { + let op = run[i]; + match op { + Opcode2D::Clear => { + tape.insert(head, Change::Set(Wrapping(0i8))); + } + Opcode2D::Subtract | Opcode2D::Add => { + let mut change = tape.remove(&head).unwrap_or(Change::Add(Wrapping(0i8))); + + let (Change::Add(val) | Change::Set(val)) = &mut change; + *val += match op { + Opcode2D::Add => 1, + Opcode2D::Subtract => -1, + _ => 0, + }; + + match &change { + Change::Add(val) => { + if *val != Wrapping(0i8) { + tape.insert(head, change); + } + } + Change::Set(_) => { tape.insert(head, change); } } - Change::Set(_) => { - tape.insert(head, change); - } } - } - Opcode::Right => { - head.0 += 1; - } - Opcode::Left => { - head.0 -= 1; - } - Opcode::Up => { - head.1 += 1; - } - Opcode::Down => { - head.1 -= 1; - } - _ => (), - } - i += 1; - } - let mut output = Vec::new(); - if exhaustive { - //Exhaustive approach checks all permutations - let mut output_length = i32::MAX; - let mut best_permutation = Vec::new(); - for perm in tape.iter().permutations(tape.len()) { - let mut position = start; - let mut current_output_length = 0; - //Calculate the distance of this - for (cell, _) in &perm { - current_output_length += (cell.0 - position.0).abs(); - current_output_length += (cell.1 - position.1).abs(); - position = **cell; - if current_output_length > output_length { - break; + Opcode2D::Right => { + head.0 += 1; } + Opcode2D::Left => { + head.0 -= 1; + } + Opcode2D::Up => { + head.1 += 1; + } + Opcode2D::Down => { + head.1 -= 1; + } + _ => (), } - if current_output_length > output_length { - continue; - } - //Add the distance to the finishing location - current_output_length += (head.0 - position.0).abs(); - current_output_length += (head.1 - position.1).abs(); - if current_output_length < output_length { - best_permutation = perm; - output_length = current_output_length; - } - } - let mut position = start; - for (cell, change) in best_permutation { - output = move_position(output, &position, cell); - position = *cell; - if let Change::Set(_) = change { - output.push(Opcode::Clear); - } - let (Change::Add(v) | Change::Set(v)) = change; - let v = v.0; - for _ in 0..(v as i32).abs() { - output.push(match v == -128 || v > 0 { - true => Opcode::Add, - false => Opcode::Subtract, - }); - } + i += 1; } - output = move_position(output, &position, &head); - } else { - //Greedy approach faster for bigger datasets - let mut position = start; - //For the number of cells navigate to the nearest cell - for _ in 0..tape.len() { - if !tape.is_empty() { - let mut min_distance = i32::MAX; - let mut next_position = (0, 0); - for (cell, _value) in tape.iter() { - if (cell.0 - position.0).abs() + (cell.1 - position.1).abs() < min_distance { - min_distance = (cell.0 - position.0).abs() + (cell.1 - position.1).abs(); - next_position = *cell; + let mut output = Vec::new(); + if self.config.optimise_generated_all_permutations { + //Exhaustive approach checks all permutations + let mut output_length = i32::MAX; + let mut best_permutation = Vec::new(); + for perm in tape.iter().permutations(tape.len()) { + let mut position = start; + let mut current_output_length = 0; + //Calculate the distance of this + for (cell, _) in &perm { + current_output_length += (cell.0 - position.0).abs(); + current_output_length += (cell.1 - position.1).abs(); + position = **cell; + if current_output_length > output_length { + break; } } - // Move to next position - output = move_position(output, &position, &next_position); - position = next_position; - //Now Update the output with correct opcodes - let change = tape.remove(&next_position).unwrap(); + if current_output_length > output_length { + continue; + } + //Add the distance to the finishing location + current_output_length += (head.0 - position.0).abs(); + current_output_length += (head.1 - position.1).abs(); + if current_output_length < output_length { + best_permutation = perm; + output_length = current_output_length; + } + } + let mut position = start; + for (cell, change) in best_permutation { + output = _move_position(output, &position, cell); + position = *cell; if let Change::Set(_) = change { - output.push(Opcode::Clear); + output.push(Opcode2D::Clear); } let (Change::Add(v) | Change::Set(v)) = change; let v = v.0; for _ in 0..(v as i32).abs() { output.push(match v == -128 || v > 0 { - true => Opcode::Add, - false => Opcode::Subtract, + true => Opcode2D::Add, + false => Opcode2D::Subtract, }); } } + output = _move_position(output, &position, &head); + } else { + //Greedy approach faster for bigger datasets + let mut position = start; + //For the number of cells navigate to the nearest cell + for _ in 0..tape.len() { + if !tape.is_empty() { + let mut min_distance = i32::MAX; + let mut next_position = TapeCell2D(0, 0); + for (cell, _value) in tape.iter() { + if (cell.0 - position.0).abs() + (cell.1 - position.1).abs() < min_distance + { + min_distance = + (cell.0 - position.0).abs() + (cell.1 - position.1).abs(); + next_position = *cell; + } + } + // Move to next position + output = _move_position(output, &position, &next_position); + position = next_position; + //Now Update the output with correct opcodes + let change = tape.remove(&next_position).unwrap(); + if let Change::Set(_) = change { + output.push(Opcode2D::Clear); + } + let (Change::Add(v) | Change::Set(v)) = change; + let v = v.0; + for _ in 0..(v as i32).abs() { + output.push(match v == -128 || v > 0 { + true => Opcode2D::Add, + false => Opcode2D::Subtract, + }); + } + } + } + output = _move_position(output, &position, &head); } - output = move_position(output, &position, &head); + output } - output } -#[cfg(test)] -mod tests { - use crate::builder::BrainfuckOpcodes; +fn _move_position( + mut program: Vec, + old_position: &TapeCell2D, + new_position: &TapeCell2D, +) -> Vec { + if old_position != new_position { + if old_position.0 < new_position.0 { + for _ in 0..(new_position.0 - old_position.0) { + program.push(Opcode2D::Right); + } + } else { + for _ in 0..(old_position.0 - new_position.0) { + program.push(Opcode2D::Left); + } + } + if old_position.1 < new_position.1 { + for _ in 0..(new_position.1 - old_position.1) { + program.push(Opcode2D::Up); + } + } else { + for _ in 0..(old_position.1 - new_position.1) { + program.push(Opcode2D::Down); + } + } + } + program +} - use super::*; +#[cfg(test)] +mod bf_optimiser_tests { + use crate::{ + backend::common::BrainfuckProgram, + misc::{MastermindConfig, MastermindContext}, + }; + + const CTX_OPT: MastermindContext = MastermindContext { + config: MastermindConfig { + optimise_generated_code: true, + optimise_generated_all_permutations: false, + optimise_cell_clearing: false, + optimise_unreachable_loops: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, + optimise_constants: false, + optimise_empty_blocks: false, + memory_allocation_method: 0, + enable_2d_grid: false, + }, + }; + + const CTX_OPT_EXHAUSTIVE: MastermindContext = MastermindContext { + config: MastermindConfig { + optimise_generated_code: true, + optimise_generated_all_permutations: true, + optimise_cell_clearing: false, + optimise_unreachable_loops: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, + optimise_constants: false, + optimise_empty_blocks: false, + memory_allocation_method: 0, + enable_2d_grid: false, + }, + }; #[test] fn greedy_subset_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("+++>><<++>--->+++<><><><><<<<<+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, false).to_string(); + let v = BrainfuckProgram::from_str("+++>><<++>--->+++<><><><><<<<<+++"); //(3) 0 0 [5] -3 3 + let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "+++++>--->+++<<<<<+++"); } #[test] fn greedy_program_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("<><><>++<+[--++>>+<<-]"); - let o: String = optimise(v, false).to_string(); + let v = BrainfuckProgram::from_str("<><><>++<+[--++>>+<<-]"); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "++<+[->>+<<]"); } #[test] fn greedy_program_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++++++>>+++>---->>>++++--<--++<>++<+<->]++--->+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, false).to_string(); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "+++++++++>>+++++++>---->>>++<<<<[>++<]"); } #[test] fn greedy_program_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str(">><."); - let o: String = optimise(v, false).to_string(); + let v = BrainfuckProgram::from_str(">><."); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, ">."); } #[test] fn greedy_subset_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str("+++<+++>[-]+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, false).to_string(); + let v = BrainfuckProgram::from_str("+++<+++>[-]+++"); //(3) 0 0 [5] -3 3 + let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "[-]+++<+++>"); } #[test] fn greedy_subset_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str("+++<+++>[-]+++[-]<[-]--+>-"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, false).to_string(); + let v = BrainfuckProgram::from_str("+++<+++>[-]+++[-]<[-]--+>-"); //(3) 0 0 [5] -3 3 + let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "[-]-<[-]->"); } #[test] fn greedy_program_equivalence_test_3() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++[-]+++++++++>>+++>---->>>++++--<--++<>++<+<->]++--->+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, false).to_string(); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "[-]+++++++++>>+++++++>---->>>++<<<<[[-]+>++<]"); } #[test] fn greedy_two_dimensional_subset_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("+++^^vv++^---^+++v^v^v^v^vvvvv+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, false).to_string(); + let v = BrainfuckProgram::from_str("+++^^vv++^---^+++v^v^v^v^vvvvv+++"); //(3) 0 0 [5] -3 3 + let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "+++++^---^+++vvvvv+++"); } #[test] fn greedy_two_dimensional_program_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("v^v^v^++v+[--++^^+vv-]"); - let o: String = optimise(v, false).to_string(); + let v = BrainfuckProgram::from_str("v^v^v^++v+[--++^^+vv-]"); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "++v+[-^^+vv]"); } #[test] fn greedy_two_dimensional_program_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++++++^^+++^----^^^++++--v--++vvhellov++++[-v+^^++v+v-^]++---^+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, false).to_string(); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "+++++++++^^+++++++^----^^^++vvvv[^++v]"); } #[test] fn greedy_two_dimensional_program_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str("^^v."); - let o: String = optimise(v, false).to_string(); + let v = BrainfuckProgram::from_str("^^v."); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "^."); } #[test] fn greedy_two_dimensional_subset_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str("+++v+++^[-]+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, false).to_string(); + let v = BrainfuckProgram::from_str("+++v+++^[-]+++"); //(3) 0 0 [5] -3 3 + let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "[-]+++v+++^"); } #[test] fn greedy_two_dimensional_subset_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str("+++v+++^[-]+++[-]v[-]--+^-"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, false).to_string(); + let v = BrainfuckProgram::from_str("+++v+++^[-]+++[-]v[-]--+^-"); //(3) 0 0 [5] -3 3 + let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "[-]-v[-]-^"); } #[test] fn greedy_two_dimensional_program_equivalence_test_3() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++[-]+++++++++^^+++^----^^^++++--v--++vvhellov++++[[-]v+^^++v+v-^]++---^+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, false).to_string(); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "[-]+++++++++^^+++++++^----^^^++vvvv[[-]+^++v]"); } #[test] #[ignore] fn exhaustive_subset_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("+++>><<++>--->+++<><><><><<<<<+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, true).to_string(); + let v = BrainfuckProgram::from_str("+++>><<++>--->+++<><><><><<<<<+++"); //(3) 0 0 [5] -3 3 + let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, ">--->+++<<+++++<<<+++"); } #[test] #[ignore] fn exhaustive_program_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("<><><>++<+[--++>>+<<-]"); - let o: String = optimise(v, true).to_string(); + let v = BrainfuckProgram::from_str("<><><>++<+[--++>>+<<-]"); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "++<+[>>+<<-]"); } #[test] #[ignore] fn exhaustive_program_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++++++>>+++>---->>>++++--<--++<>++<+<->]++--->+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, true).to_string(); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "+++++++++>>+++++++>>>>++<<<----<[>++<]"); } #[test] #[ignore] fn exhaustive_program_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str(">><."); - let o: String = optimise(v, true).to_string(); + let v = BrainfuckProgram::from_str(">><."); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, ">."); } #[test] #[ignore] fn exhaustive_subset_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str("+++<+++>[-]+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, true).to_string(); + let v = BrainfuckProgram::from_str("+++<+++>[-]+++"); //(3) 0 0 [5] -3 3 + let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "[-]+++<+++>"); } #[test] #[ignore] fn exhaustive_subset_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str("+++<+++>[-]+++[-]<[-]--+>-"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, true).to_string(); + let v = BrainfuckProgram::from_str("+++<+++>[-]+++[-]<[-]--+>-"); //(3) 0 0 [5] -3 3 + let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "[-]-<[-]->"); } #[test] #[ignore] fn exhaustive_program_equivalence_test_3() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++[-]+++++++++>>+++>---->>>++++--<--++<>++<+<->]++--->+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, true).to_string(); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "[-]+++++++++>>+++++++>---->>>++<<<<[[-]+>++<]"); } #[test] #[ignore] fn exhaustive_two_dimensional_subset_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("+++^^vv++^---^+++v^v^v^v^vvvvv+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, true).to_string(); + let v = BrainfuckProgram::from_str("+++^^vv++^---^+++v^v^v^v^vvvvv+++"); //(3) 0 0 [5] -3 3 + let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "^^+++v---v+++++vvv+++"); } #[test] #[ignore] fn exhaustive_two_dimensional_program_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("v^v^v^++v+[--++^^+vv-]"); - let o: String = optimise(v, true).to_string(); + let v = BrainfuckProgram::from_str("v^v^v^++v+[--++^^+vv-]"); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "++v+[^^+vv-]"); } #[test] #[ignore] fn exhaustive_two_dimensional_program_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++++++^^+++^----^^^++++--v--++vvhellov++++[-v+^^++v+v-^]++---^+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, true).to_string(); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "+++++++++^^+++++++^----^^^++vvvv[^++v]"); } #[test] #[ignore] fn exhaustive_two_dimensional_program_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str("^^v."); - let o: String = optimise(v, true).to_string(); + let v = BrainfuckProgram::from_str("^^v."); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "^."); } #[test] #[ignore] fn exhaustive_two_dimensional_subset_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str("+++v+++^[-]+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, true).to_string(); + let v = BrainfuckProgram::from_str("+++v+++^[-]+++"); //(3) 0 0 [5] -3 3 + let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "[-]+++v+++^"); } #[test] #[ignore] fn exhaustive_two_dimensional_subset_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str("+++v+++^[-]+++[-]v[-]--+^-"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, true).to_string(); + let v = BrainfuckProgram::from_str("+++v+++^[-]+++[-]v[-]--+^-"); //(3) 0 0 [5] -3 3 + let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "[-]-v[-]-^"); } #[test] #[ignore] fn exhaustive_two_dimensional_program_equivalence_test_3() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++[-]+++++++++^^+++^----^^^++++--v--++vvhellov++++[[-]v+^^++v+v-^]++---^+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, true).to_string(); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "[-]+++++++++^^^^^^++vvv----v+++++++[^++v[-]+]"); } fn subset_edge_case_0() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "-++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++", ); - let o: String = optimise_subset(v, false).to_string(); + let o: String = CTX_OPT.optimise_subset(v).to_string(); println!("{o}"); assert_eq!(o.len(), 127); } #[test] fn subset_edge_case_1() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++", ); - let o: String = optimise_subset(v, false).to_string(); + let o: String = CTX_OPT.optimise_subset(v).to_string(); println!("{o}"); assert_eq!(o.len(), 128); } #[test] fn subset_edge_case_2() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+--------------------------------------------------------------------------------------------------------------------------------" ); - let o: String = optimise_subset(v, false).to_string(); + let o: String = CTX_OPT.optimise_subset(v).to_string(); println!("{o}"); assert_eq!(o.len(), 127); } #[test] fn subset_edge_case_3() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "--------------------------------------------------------------------------------------------------------------------------------" ); - let o: String = optimise_subset(v, false).to_string(); + let o: String = CTX_OPT.optimise_subset(v).to_string(); println!("{o}"); assert_eq!(o.len(), 128); } #[test] fn subset_edge_case_3a() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "- --------------------------------------------------------------------------------------------------------------------------------" ); - let o: String = optimise_subset(v, false).to_string(); + let o: String = CTX_OPT.optimise_subset(v).to_string(); println!("{o}"); assert_eq!(o.len(), 127); } #[test] fn subset_edge_case_4() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "[-]--------------------------------------------------------------------------------------------------------------------------------" ); - let o: String = optimise_subset(v, false).to_string(); + let o: String = CTX_OPT.optimise_subset(v).to_string(); println!("{o}"); assert_eq!(o.len(), 131); } diff --git a/compiler/src/builder.rs b/compiler/src/builder.rs deleted file mode 100644 index bf35cc4..0000000 --- a/compiler/src/builder.rs +++ /dev/null @@ -1,736 +0,0 @@ -// turns low-level bf instructions into plain bf -// take in a timeline of cell allocations and move-to-cell operations, etc -// output plain bf according to that spec - -// this algorithm is responsible for actually allocating physical tape cells as opposed to the parser -// can introduce optimisations here with some kind of allocation timeline sorting algorithm (hard leetcode style problem) - -use std::{ - collections::{HashMap, HashSet}, - num::Wrapping, -}; - -use crate::{ - compiler::{CellLocation, Instruction, MemoryId}, - constants_optimiser::calculate_optimal_addition, - macros::macros::{r_assert, r_panic}, - MastermindConfig, -}; - -pub struct Builder<'a> { - pub config: &'a MastermindConfig, -} - -type LoopDepth = usize; -pub type TapeCell = (i32, i32); -type TapeValue = u8; - -impl Builder<'_> { - pub fn build( - &self, - instructions: Vec, - return_to_origin: bool, - ) -> Result, String> { - let mut allocator = CellAllocator::new(); - let mut alloc_map: HashMap>)> = - HashMap::new(); - - let mut loop_stack: Vec = Vec::new(); - let mut current_loop_depth: LoopDepth = 0; - let mut skipped_loop_depth: Option = None; - let mut ops = BrainfuckCodeBuilder::new(); - - for instruction in instructions { - if let Some(depth) = skipped_loop_depth { - // current loop is being skipped because of unreachable loop optimisations - match instruction { - Instruction::OpenLoop(_) => { - current_loop_depth += 1; - } - Instruction::CloseLoop(_) => { - current_loop_depth -= 1; - if current_loop_depth == depth { - skipped_loop_depth = None; - } - } - _ => (), - } - continue; - } - match instruction { - // the ids (indices really) given by the compiler are guaranteed to be unique (at the time of writing) - // however they will absolutely not be very efficient if used directly as cell locations - Instruction::Allocate(memory, location_specifier) => { - let cell = allocator.allocate( - location_specifier, - memory.len(), - self.config.memory_allocation_method, - )?; - let None = alloc_map.insert( - memory.id(), - ( - cell, - memory.len(), - current_loop_depth, - vec![Some(0); memory.len()], - ), - ) else { - r_panic!("Attempted to reallocate memory {memory:#?}"); - }; - } - Instruction::AssertCellValue(cell_obj, imm) => { - let Some((_cell_base, size, alloc_loop_depth, known_values)) = - alloc_map.get_mut(&cell_obj.memory_id) - else { - r_panic!( - "Attempted to assert value of cell {cell_obj:#?} \ -which could not be found" - ); - }; - - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let known_value = &mut known_values[mem_idx]; - - // allow the user to assert that we don't know the value of the cell by clobbering when we do inline brainfuck - if *alloc_loop_depth == current_loop_depth || imm.is_none() { - *known_value = imm; - } else { - r_panic!( - "Cannot assert cell {cell_obj:#?} value \ -outside of loop it was allocated" - ); - } - } - Instruction::Free(id) => { - // TODO: do I need to check alloc loop depth here? Or are cells never freed in an inner scope? - // think about this in regards to reusing cell space when a cell isn't being used - let Some((cell, size, _alloc_loop_depth, known_values)) = alloc_map.remove(&id) - else { - r_panic!("Attempted to free memory id {id} which could not be found"); - }; - - let None = known_values - .into_iter() - .find_map(|known_value| (known_value.unwrap_or(1) != 0).then_some(())) - else { - r_panic!( - "Attempted to free memory id {id} which has unknown or non-zero values" - ); - }; - - allocator.free(cell, size)?; - } - Instruction::OpenLoop(cell_obj) => { - let Some((cell_base, size, alloc_loop_depth, known_values)) = - alloc_map.get_mut(&cell_obj.memory_id) - else { - r_panic!( - "Attempted to open loop at cell {cell_obj:#?} which could not be found" - ); - }; - - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); - let known_value = &mut known_values[mem_idx]; - - let mut open = true; - - if let Some(known_value) = known_value { - if *alloc_loop_depth == current_loop_depth - && *known_value == 0 && self.config.optimise_unreachable_loops - { - open = false; - skipped_loop_depth = Some(current_loop_depth); - current_loop_depth += 1; - } - } - - // skip the loop if the optimisations are turned on and we know the value is 0 - if open { - ops.move_to_cell(cell); - ops.push(Opcode::OpenLoop); - loop_stack.push(cell); - current_loop_depth += 1; - } - } - Instruction::CloseLoop(cell_obj) => { - let Some((cell_base, size, alloc_loop_depth, known_values)) = - alloc_map.get_mut(&cell_obj.memory_id) - else { - r_panic!( - "Attempted to close loop at cell {cell_obj:#?} which could not be found" - ); - }; - - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); - let known_value = &mut known_values[mem_idx]; - - let Some(stack_cell) = loop_stack.pop() else { - r_panic!("Attempted to close un-opened loop"); - }; - r_assert!(cell == stack_cell, "Attempted to close a loop unbalanced"); - - current_loop_depth -= 1; - - ops.move_to_cell(cell); - ops.push(Opcode::CloseLoop); - - // if a loop finishes on a cell then it is guaranteed to be 0 based on brainfuck itself - // I did encounter issues with nested loops here, interesting - if current_loop_depth == *alloc_loop_depth { - *known_value = Some(0); - } - } - Instruction::AddToCell(cell_obj, imm) => { - let Some((cell_base, size, alloc_loop_depth, known_values)) = - alloc_map.get_mut(&cell_obj.memory_id) - else { - r_panic!("Attempted to add to cell {cell_obj:#?} which could not be found"); - }; - - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); - let known_value = &mut known_values[mem_idx]; - - // TODO: fix bug, if only one multiplication then we can have a value already in the cell, but never otherwise - - // not sure if these optimisations should be in the builder step or in the compiler - if self.config.optimise_constants { - // ops.move_to_cell(&mut head_pos, cell); - // TODO: algorithm that finds the best combo of products and constants to make the number to minimise bf code - // first we get the closest allocated cell so we can calculate the distance cost of multiplying - // TODO: instead find the nearest zero cell, doesn't matter if allocated or not - let temp_cell = allocator.allocate_temp_cell(cell); - - let optimised_ops: BrainfuckCodeBuilder = - calculate_optimal_addition(imm as i8, ops.head_pos, cell, temp_cell); - - ops.head_pos = optimised_ops.head_pos; - ops.extend(optimised_ops.opcodes); - - allocator.free(temp_cell, 1)?; - } else { - ops.move_to_cell(cell); - ops.add_to_current_cell(imm as i8); - } - - if imm != 0 { - if *alloc_loop_depth != current_loop_depth { - *known_value = None; - } else if let Some(known_value) = known_value { - *known_value = (Wrapping(*known_value) + Wrapping(imm)).0; - } - } - } - Instruction::InputToCell(cell_obj) => { - let Some((cell_base, size, _, known_values)) = - alloc_map.get_mut(&cell_obj.memory_id) - else { - r_panic!( - "Attempted to input to cell {cell_obj:#?} which could not be found" - ); - }; - - // TODO: refactor this duplicate code (get_cell_safe or something like that) - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); - let known_value = &mut known_values[mem_idx]; - - ops.move_to_cell(cell); - ops.push(Opcode::Input); - // no way to know at compile time what the input to the program will be - *known_value = None; - } - // Instruction::AssertCellValue(id, value) => {} - Instruction::ClearCell(cell_obj) => { - let Some((cell_base, size, alloc_loop_depth, known_values)) = - alloc_map.get_mut(&cell_obj.memory_id) - else { - r_panic!("Attempted to clear cell {cell_obj:#?} which could not be found"); - }; - - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); - let known_value = &mut known_values[mem_idx]; - - ops.move_to_cell(cell); - - let mut clear = true; - - if let Some(known_value) = known_value { - if self.config.optimise_cell_clearing - && *alloc_loop_depth == current_loop_depth - && (*known_value as i8).abs() < 4 - // not sure if this should be 4 or 3, essentially it depends on if we prefer clears or changes [-] vs ++--- - { - let imm = *known_value as i8; - if imm > 0 { - for _ in 0..imm { - ops.push(Opcode::Subtract); - } - } else if imm < 0 { - for _ in 0..-imm { - ops.push(Opcode::Add); - } - } - clear = false; - } - } - - if clear { - ops.push(Opcode::Clear); - } - - if *alloc_loop_depth == current_loop_depth { - *known_value = Some(0); - } else { - // TODO: fix this for if statements - *known_value = None; - } - } - Instruction::OutputCell(cell_obj) => { - let Some((cell_base, size, _, _)) = alloc_map.get(&cell_obj.memory_id) else { - r_panic!("Attempted to output cell {cell_obj:#?} which could not be found"); - }; - - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); - - ops.move_to_cell(cell); - ops.push(Opcode::Output); - } - Instruction::InsertBrainfuckAtCell(operations, location_specifier) => { - // move to the correct cell, based on the location specifier - match location_specifier { - CellLocation::FixedCell(cell) => ops.move_to_cell(cell), - CellLocation::MemoryCell(cell_obj) => { - let Some((cell_base, size, _alloc_loop_depth, _known_values)) = - alloc_map.get(&cell_obj.memory_id) - else { - r_panic!("Attempted to use location of cell {cell_obj:#?} which could not be found"); - }; - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); - ops.move_to_cell(cell); - } - CellLocation::Unspecified => (), - } - - // paste the in-line BF operations - ops.extend(operations); - } - } - } - - // this is used in embedded brainfuck contexts to preserve head position - if return_to_origin { - ops.move_to_cell((0, 0)); - } - - Ok(ops.opcodes) - } -} - -struct CellAllocator { - alloc_map: HashSet, -} - -// allocator will not automatically allocate negative-index cells -// but users can -impl CellAllocator { - fn new() -> CellAllocator { - CellAllocator { - alloc_map: HashSet::new(), - } - } - - /// Checks if the memory size can be allocated to the right of a given location e.g. arrays - fn check_allocatable(&mut self, location: &TapeCell, size: usize) -> bool { - for k in 0..size { - if self - .alloc_map - .contains(&(location.0 + k as i32, location.1)) - { - return false; - } - } - return true; - } - - /// Will either check a specific location can be allocated at the chosen size or if no location is - /// provided it will find a memory location where this size can be allocated - /// Uses a variety of memory allocation methods based on settings - fn allocate( - &mut self, - location: Option, - size: usize, - method: u8, - ) -> Result { - let mut region_start = location.unwrap_or((0, 0)); - //Check specified memory allocation above to ensure that this works nicely with all algorithms - if let Some(l) = location { - if !self.check_allocatable(&l, size) { - r_panic!( - "Location specifier @{0},{1} conflicts with another allocation", - l.0, - l.1 - ); - } - } else { - // should the region start at the current tape head? - if method == 0 { - for i in region_start.0.. { - if self.alloc_map.contains(&(i, region_start.1)) { - region_start = (i + 1, region_start.1); - } else if i - region_start.0 == (size as i32 - 1) { - break; - } - } - } else if method == 1 { - //Zig Zag - let mut found = false; - let mut loops = 0; - let mut i; - let mut j; - while !found { - i = region_start.0 + loops; - j = region_start.1; - for _ in 0..=loops { - if self.check_allocatable(&(i, j), size) { - found = true; - region_start = (i, j); - break; - } - i = i - 1; - j = j + 1; - } - loops += 1; - } - } else if method == 2 { - //Spiral - let mut found = false; - let mut loops = 1; - let directions = ['N', 'E', 'S', 'W']; - let mut i = region_start.0; - let mut j = region_start.1; - while !found { - for dir in directions { - match dir { - 'N' => { - for _ in 0..loops { - j += 1; - if self.check_allocatable(&(i, j), size) { - found = true; - region_start = (i, j); - break; - } - } - } - 'E' => { - for _ in 0..loops { - i += 1; - if self.check_allocatable(&(i, j), size) { - found = true; - region_start = (i, j); - break; - } - } - } - 'S' => { - for _ in 0..loops { - j -= 1; - if self.check_allocatable(&(i, j), size) { - found = true; - region_start = (i, j); - break; - } - } - } - 'W' => { - for _ in 0..loops { - i -= 1; - if self.check_allocatable(&(i, j), size) { - found = true; - region_start = (i, j); - break; - } - } - } - _ => {} - } - if found { - break; - } - } - if found { - break; - } - i -= 1; - j -= 1; - loops += 2; - } - } else if method == 3 { - //Tiles - let mut found = false; - let mut loops = 0; - while !found { - for i in -loops..=loops { - for j in -loops..=loops { - if self - .check_allocatable(&(region_start.0 + i, region_start.1 + j), size) - { - found = true; - region_start = (region_start.0 + i, region_start.1 + j); - break; - } - } - if found { - break; - } - } - loops += 1; - } - } else { - panic!("Memory Allocation Method not implemented"); - } - } - - // make all cells in the specified region allocated - for i in region_start.0..(region_start.0 + size as i32) { - if !self.alloc_map.contains(&(i, region_start.1)) { - self.alloc_map.insert((i, region_start.1)); - } - } - - Ok(region_start) - } - - // allocate but start looking close to the given cell, used for optimising constants as you need an extra cell to multiply - // again not sure if this stuff should be in the builder step or the compiler step ? This seems the simplest for now - // but I'm wary that complex systems often evolve from simple ones, and any optimisations introduce complexity - fn allocate_temp_cell(&mut self, location: TapeCell) -> TapeCell { - // this will allocate the given cell if unallocated so beware - if self.alloc_map.insert(location) { - return location; - } - - // alternate left then right, getting further and further out - // there is surely a nice one liner rusty iterator way of doing it but somehow this is clearer until I learn that - let mut left_iter = (0..location.0).rev(); - let mut right_iter = (location.0 + 1)..; - loop { - if let Some(i) = left_iter.next() { - // unallocated cell, allocate it and return - if self.alloc_map.insert((i, location.1)) { - return (i, location.1); - } else { - } - } - - if let Some(i) = right_iter.next() { - if self.alloc_map.insert((i, location.1)) { - return (i, location.1); - } - } - } - } - - fn free(&mut self, cell: TapeCell, size: usize) -> Result<(), String> { - for i in cell.0..(cell.0 + size as i32) { - r_assert!( - self.alloc_map.contains(&(i, cell.1)), - "Cannot free cell @{0},{1} as it is not allocated.", - i, - cell.1 - ); - self.alloc_map.remove(&(i, cell.1)); - } - - Ok(()) - } -} - -#[derive(Clone, Copy, Debug)] -pub enum Opcode { - Add, - Subtract, - Right, - Left, - OpenLoop, - CloseLoop, - Output, - Input, - Clear, - Up, - Down, -} - -pub struct BrainfuckCodeBuilder { - opcodes: Vec, - pub head_pos: TapeCell, -} - -pub trait BrainfuckOpcodes { - fn to_string(self) -> String; - fn from_str(s: &str) -> Self; -} - -impl BrainfuckOpcodes for Vec { - fn to_string(self) -> String { - let mut s = String::new(); - self.into_iter().for_each(|o| { - s.push_str(match o { - Opcode::Add => "+", - Opcode::Subtract => "-", - Opcode::Right => ">", - Opcode::Left => "<", - Opcode::OpenLoop => "[", - Opcode::CloseLoop => "]", - Opcode::Output => ".", - Opcode::Input => ",", - Opcode::Clear => "[-]", - Opcode::Up => "^", - Opcode::Down => "v", - }) - }); - s - } - - fn from_str(s: &str) -> Self { - let mut ops = Vec::new(); - let mut i = 0; - while i < s.len() { - let substr = &s[i..]; - if substr.starts_with("[-]") { - ops.push(Opcode::Clear); - i += 3; - } else { - match substr.chars().next().unwrap() { - '+' => ops.push(Opcode::Add), - '-' => ops.push(Opcode::Subtract), - '>' => ops.push(Opcode::Right), - '<' => ops.push(Opcode::Left), - '[' => ops.push(Opcode::OpenLoop), - ']' => ops.push(Opcode::CloseLoop), - '.' => ops.push(Opcode::Output), - ',' => ops.push(Opcode::Input), - '^' => ops.push(Opcode::Up), - 'v' => ops.push(Opcode::Down), - _ => (), // could put a little special opcode in for other characters - } - i += 1; - } - } - - ops - } -} - -impl BrainfuckOpcodes for BrainfuckCodeBuilder { - fn to_string(self) -> String { - self.opcodes.to_string() - } - - fn from_str(s: &str) -> Self { - BrainfuckCodeBuilder { - opcodes: BrainfuckOpcodes::from_str(s), - head_pos: (0, 0), - } - } -} - -impl BrainfuckCodeBuilder { - pub fn new() -> BrainfuckCodeBuilder { - BrainfuckCodeBuilder { - opcodes: Vec::new(), - head_pos: (0, 0), - } - } - pub fn len(&self) -> usize { - self.opcodes.len() - } - pub fn push(&mut self, op: Opcode) { - self.opcodes.push(op); - } - pub fn extend(&mut self, ops: T) - where - T: IntoIterator, - { - self.opcodes.extend(ops); - } - pub fn move_to_cell(&mut self, cell: TapeCell) { - let x = cell.0; - let y = cell.1; - let x_pos = self.head_pos.0; - let y_pos = self.head_pos.1; - //Move x level - if x_pos < x { - for _ in x_pos..x { - self.opcodes.push(Opcode::Right); - } - } else if x < x_pos { - // theoretically equivalent to cell..head_pos? - for _ in ((x + 1)..=x_pos).rev() { - self.opcodes.push(Opcode::Left); - } - } - //Move y level - if y_pos < y { - for _ in y_pos..y { - self.opcodes.push(Opcode::Up); - } - } else if y < y_pos { - // theoretically equivalent to cell..head_pos? - for _ in ((y + 1)..=y_pos).rev() { - self.opcodes.push(Opcode::Down); - } - } - self.head_pos = cell; - } - - pub fn add_to_current_cell(&mut self, imm: i8) { - if imm > 0 { - for _ in 0..imm { - self.opcodes.push(Opcode::Add); - } - } else if imm < 0 { - // needs to be i32 because -(-128) = -128 in i8-land - for _ in 0..-(imm as i32) { - self.opcodes.push(Opcode::Subtract); - } - } - } -} diff --git a/compiler/src/compiler.rs b/compiler/src/frontend/frontend.rs similarity index 63% rename from compiler/src/compiler.rs rename to compiler/src/frontend/frontend.rs index 2ed69f3..e312a9b 100644 --- a/compiler/src/compiler.rs +++ b/compiler/src/frontend/frontend.rs @@ -1,49 +1,58 @@ // compile syntax tree into low-level instructions -use std::{collections::HashMap, iter::zip}; +use std::{collections::HashMap, fmt::Display, iter::zip}; +use super::types::*; use crate::{ - builder::{Builder, Opcode, TapeCell}, - macros::macros::{r_assert, r_panic}, + backend::common::{ + BrainfuckBuilder, BrainfuckBuilderData, CellAllocator, CellAllocatorData, OpcodeVariant, + TapeCellVariant, + }, + macros::macros::*, + misc::MastermindContext, parser::{ - Clause, Expression, ExtendedOpcode, LocationSpecifier, Reference, VariableDefinition, - VariableTarget, VariableTargetReferenceChain, VariableTypeReference, + expressions::Expression, + types::{ + Clause, ExtendedOpcode, LocationSpecifier, Reference, StructFieldTypeDefinition, + VariableTarget, VariableTargetReferenceChain, VariableTypeDefinition, + VariableTypeReference, + }, }, - MastermindConfig, }; -// memory stuff is all WIP and some comments may be incorrect - -pub struct Compiler<'a> { - pub config: &'a MastermindConfig, -} - -impl Compiler<'_> { - pub fn compile<'a>( - &'a self, - clauses: &[Clause], - outer_scope: Option<&'a Scope>, - ) -> Result, String> { +impl MastermindContext { + pub fn create_ir_scope<'a, TC: 'static + TapeCellVariant, OC: 'static + OpcodeVariant>( + &self, + clauses: &[Clause], + outer_scope: Option<&'a ScopeBuilder>, + ) -> Result, String> + where + BrainfuckBuilderData: BrainfuckBuilder, + CellAllocatorData: CellAllocator, + { let mut scope = if let Some(outer) = outer_scope { outer.open_inner() } else { - Scope::new() + ScopeBuilder::new() }; // TODO: fix unnecessary clones, and reimplement this with iterators somehow // hoist structs, then functions to top - let mut filtered_clauses_1: Vec = vec![]; + let mut filtered_clauses_1 = vec![]; // first stage: structs (these need to be defined before functions, so they can be used as arguments) for clause in clauses { match clause { Clause::DefineStruct { name, fields } => { + // convert fields with 2D or 1D location specifiers to valid struct location specifiers scope.register_struct_definition(name, fields.clone())?; } + // also filter out None clauses (although there shouldn't be any) + Clause::None => (), _ => filtered_clauses_1.push(clause.clone()), } } // second stage: functions - let mut filtered_clauses_2: Vec = vec![]; + let mut filtered_clauses_2 = vec![]; for clause in filtered_clauses_1 { match clause { Clause::DefineFunction { @@ -80,7 +89,7 @@ impl Compiler<'_> { | Expression::VariableReference(_), ) => { let cell = scope.get_cell(&VariableTarget::from_definition(&var))?; - _add_expr_to_cell(&mut scope, &value, cell)?; + scope._add_expr_to_cell(&value, cell)?; } // multi-cell arrays and (array literals or strings) @@ -93,7 +102,7 @@ impl Compiler<'_> { expressions.len() ); for (cell, expr) in zip(cells, expressions) { - _add_expr_to_cell(&mut scope, expr, cell)?; + scope._add_expr_to_cell(expr, cell)?; } } (ValueType::Array(_, _), Expression::StringLiteral(s)) => { @@ -147,7 +156,7 @@ impl Compiler<'_> { } } } - Clause::SetVariable { + Clause::Assign { var, value, self_referencing, @@ -155,11 +164,11 @@ impl Compiler<'_> { (false, false) => { let cell = scope.get_cell(&var)?; scope.push_instruction(Instruction::ClearCell(cell.clone())); - _add_expr_to_cell(&mut scope, &value, cell)?; + scope._add_expr_to_cell(&value, cell)?; } (false, true) => { let cell = scope.get_cell(&var)?; - _add_self_referencing_expr_to_cell(&mut scope, value, cell, true)?; + scope._add_self_referencing_expr_to_cell(value, cell, true)?; } (true, _) => { r_panic!("Unsupported operation, assigning to spread variable: {var}"); @@ -168,18 +177,18 @@ impl Compiler<'_> { // etc... } }, - Clause::AddToVariable { + Clause::AddAssign { var, value, self_referencing, } => match (var.is_spread, self_referencing) { (false, false) => { let cell = scope.get_cell(&var)?; - _add_expr_to_cell(&mut scope, &value, cell)?; + scope._add_expr_to_cell(&value, cell)?; } (false, true) => { let cell = scope.get_cell(&var)?; - _add_self_referencing_expr_to_cell(&mut scope, value, cell, false)?; + scope._add_self_referencing_expr_to_cell(value, cell, false)?; } (true, _) => { r_panic!("Unsupported operation, add-assigning to spread variable: {var}"); @@ -219,7 +228,7 @@ impl Compiler<'_> { } } } - Clause::InputVariable { var } => match var.is_spread { + Clause::Input { var } => match var.is_spread { false => { let cell = scope.get_cell(&var)?; scope.push_instruction(Instruction::InputToCell(cell)); @@ -231,7 +240,7 @@ impl Compiler<'_> { } } }, - Clause::OutputValue { value } => { + Clause::Output { value } => { match value { Expression::VariableReference(var) => match var.is_spread { false => { @@ -261,7 +270,7 @@ impl Compiler<'_> { index: None, }; - _add_expr_to_cell(&mut scope, &value, cell)?; + scope._add_expr_to_cell(&value, cell)?; scope.push_instruction(Instruction::OutputCell(cell)); scope.push_instruction(Instruction::ClearCell(cell)); @@ -280,7 +289,7 @@ impl Compiler<'_> { }; for value in expressions { - _add_expr_to_cell(&mut scope, &value, cell)?; + scope._add_expr_to_cell(&value, cell)?; scope.push_instruction(Instruction::OutputCell(cell)); scope.push_instruction(Instruction::ClearCell(cell)); } @@ -309,7 +318,7 @@ impl Compiler<'_> { } } } - Clause::WhileLoop { var, block } => { + Clause::While { var, block } => { let cell = scope.get_cell(&var)?; // open loop on variable @@ -317,25 +326,25 @@ impl Compiler<'_> { // recursively compile instructions // TODO: when recursively compiling, check which things changed based on a return info value - let loop_scope = self.compile(&block, Some(&scope))?; - scope - .instructions - .extend(loop_scope.finalise_instructions(true)); + let loop_scope = self.create_ir_scope(&block, Some(&scope))?; + scope.instructions.extend(loop_scope.build_ir(true)); // close the loop scope.push_instruction(Instruction::CloseLoop(cell)); } - Clause::CopyLoop { + Clause::DrainLoop { source, targets, block, - is_draining, + is_copying, } => { // TODO: refactor this, there is duplicate code with copying the source value cell - let (source_cell, free_source_cell) = match (is_draining, &source) { + let (source_cell, free_source_cell) = match (is_copying, &source) { // draining loops can drain from an expression or a variable - (true, Expression::VariableReference(var)) => (scope.get_cell(var)?, false), - (true, _) => { + (false, Expression::VariableReference(var)) => { + (scope.get_cell(var)?, false) + } + (false, _) => { // any other kind of expression, allocate memory for it automatically let id = scope.push_memory_id(); scope @@ -344,10 +353,10 @@ impl Compiler<'_> { memory_id: id, index: None, }; - _add_expr_to_cell(&mut scope, &source, new_cell)?; + scope._add_expr_to_cell(&source, new_cell)?; (new_cell, true) } - (false, Expression::VariableReference(var)) => { + (true, Expression::VariableReference(var)) => { let cell = scope.get_cell(var)?; let new_mem_id = scope.push_memory_id(); @@ -361,22 +370,22 @@ impl Compiler<'_> { index: None, }; - _copy_cell(&mut scope, cell, new_cell, 1); + scope._copy_cell(cell, new_cell, 1); (new_cell, true) } - (false, _) => { + (true, _) => { r_panic!("Cannot copy from {source:#?}, use a drain loop instead") } }; scope.push_instruction(Instruction::OpenLoop(source_cell)); // recurse - let loop_scope = self.compile(&block, Some(&scope))?; - // TODO: refactor, make a function in scope trait to do this automatically - scope - .instructions - .extend(loop_scope.finalise_instructions(true)); + if let Some(block) = block { + let loop_scope = self.create_ir_scope(&block, Some(&scope))?; + // TODO: refactor, make a function in scope trait to do this automatically + scope.instructions.extend(loop_scope.build_ir(true)); + } // copy into each target and decrement the source for target in targets { @@ -402,11 +411,48 @@ impl Compiler<'_> { scope.push_instruction(Instruction::Free(source_cell.memory_id)); } } - Clause::IfElse { - condition, - if_block, - else_block, - } => { + clause @ (Clause::If { + condition: _, + if_block: _, + } + | Clause::IfNot { + condition: _, + if_not_block: _, + } + | Clause::IfElse { + condition: _, + if_block: _, + else_block: _, + } + | Clause::IfNotElse { + condition: _, + if_not_block: _, + else_block: _, + }) => { + // If-else clause types changed recently, so here is a patch to keep the original frontend code: + let (condition, if_block, else_block) = match clause { + Clause::If { + condition, + if_block, + } => (condition, Some(if_block), None), + Clause::IfNot { + condition, + if_not_block, + } => (condition, None, Some(if_not_block)), + Clause::IfElse { + condition, + if_block, + else_block, + } => (condition, Some(if_block), Some(else_block)), + Clause::IfNotElse { + condition, + if_not_block, + else_block, + } => (condition, Some(else_block), Some(if_not_block)), + _ => unreachable!(), + }; + // end patch // + if if_block.is_none() && else_block.is_none() { panic!("Expected block in if/else statement"); }; @@ -442,7 +488,7 @@ impl Compiler<'_> { }; // copy the condition expression to the temporary condition cell - _add_expr_to_cell(&mut new_scope, &condition, condition_cell)?; + new_scope._add_expr_to_cell(&condition, condition_cell)?; new_scope.push_instruction(Instruction::OpenLoop(condition_cell)); // TODO: think about optimisations for clearing this variable, as the builder won't shorten it for safety as it doesn't know this loop is special @@ -456,10 +502,8 @@ impl Compiler<'_> { // recursively compile if block if let Some(block) = if_block { - let if_scope = self.compile(&block, Some(&new_scope))?; - new_scope - .instructions - .extend(if_scope.finalise_instructions(true)); + let if_scope = self.create_ir_scope(&block, Some(&new_scope))?; + new_scope.instructions.extend(if_scope.build_ir(true)); }; // close if block @@ -475,64 +519,44 @@ impl Compiler<'_> { // recursively compile else block // TODO: fix this bad practice unwrap let block = else_block.unwrap(); - let else_scope = self.compile(&block, Some(&new_scope))?; - new_scope - .instructions - .extend(else_scope.finalise_instructions(true)); + let else_scope = self.create_ir_scope(&block, Some(&new_scope))?; + new_scope.instructions.extend(else_scope.build_ir(true)); new_scope.push_instruction(Instruction::CloseLoop(cell)); new_scope.push_instruction(Instruction::Free(cell.memory_id)); } // extend the inner scopes instructions onto the outer one - scope - .instructions - .extend(new_scope.finalise_instructions(true)); + scope.instructions.extend(new_scope.build_ir(true)); } Clause::Block(clauses) => { - let new_scope = self.compile(&clauses, Some(&scope))?; - scope - .instructions - .extend(new_scope.finalise_instructions(true)); + let new_scope = self.create_ir_scope(&clauses, Some(&scope))?; + scope.instructions.extend(new_scope.build_ir(true)); } - Clause::InlineBrainfuck { + Clause::Brainfuck { location_specifier, clobbered_variables, operations, } => { // loop through the opcodes - let mut expanded_bf: Vec = Vec::new(); + let mut expanded_bf: Vec = Vec::new(); for op in operations { match op { ExtendedOpcode::Block(mm_clauses) => { // create a scope object for functions from the outside scope let functions_scope = scope.open_inner_templates_only(); // compile the block and extend the operations + let instructions = self + .create_ir_scope(&mm_clauses, Some(&functions_scope))? + // compile without cleaning up top level variables, this is the brainfuck programmer's responsibility + .build_ir(false); - let compiler = Compiler { - config: &self.config, - }; - let instructions = compiler - .compile(&mm_clauses, Some(&functions_scope))? - .finalise_instructions(false); - // compile without cleaning up top level variables, this is the brainfuck programmer's responsibility // it is also the brainfuck programmer's responsibility to return to the start position - let builder = Builder { - config: &self.config, - }; - let built_code = builder.build(instructions, true)?; - expanded_bf.extend(built_code); + let bf_code = + self.ir_to_bf(instructions, Some(TC::origin_cell()))?; + expanded_bf.extend(bf_code); } - ExtendedOpcode::Add => expanded_bf.push(Opcode::Add), - ExtendedOpcode::Subtract => expanded_bf.push(Opcode::Subtract), - ExtendedOpcode::Right => expanded_bf.push(Opcode::Right), - ExtendedOpcode::Left => expanded_bf.push(Opcode::Left), - ExtendedOpcode::OpenLoop => expanded_bf.push(Opcode::OpenLoop), - ExtendedOpcode::CloseLoop => expanded_bf.push(Opcode::CloseLoop), - ExtendedOpcode::Output => expanded_bf.push(Opcode::Output), - ExtendedOpcode::Input => expanded_bf.push(Opcode::Input), - ExtendedOpcode::Up => expanded_bf.push(Opcode::Up), - ExtendedOpcode::Down => expanded_bf.push(Opcode::Down), + ExtendedOpcode::Opcode(opcode) => expanded_bf.push(opcode), } } @@ -573,52 +597,57 @@ impl Compiler<'_> { } => { // create variable translations and recursively compile the inner variable block - let calling_argument_types = arguments + // get the calling arguments' types + let calling_argument_types: Vec = arguments .iter() - .map(|a| scope.get_target_type(&a)) - .collect::, _>>()?; + .map(|arg| scope.get_expression_type(arg)) + .collect::, String>>()?; + // find the function based on name * types let function_definition = scope.get_function(&function_name, &calling_argument_types)?; + // create mappings in a new translation scope, so mappings will be removed once scope closes let mut argument_translation_scope = scope.open_inner(); - - // TODO: refactor this mess - // deal with argument memory mappings: - for ((calling_argument, calling_argument_type), (arg_name, expected_type)) in - zip( - zip(arguments, calling_argument_types), - function_definition.arguments.iter(), - ) { - // TODO: fix this duplicate call, get_target_type() internally gets the memory allocation details - // then these are gotten again in create_mapped_variable() - r_assert!(calling_argument_type == expected_type, "Expected argument of type \"{expected_type:#?}\" in function call \"{function_name}\", received argument of type \"{calling_argument_type:#?}\". This should not occur"); - // register an argument translation in the scope + assert_eq!(arguments.len(), function_definition.arguments.len()); + for (calling_expr, (arg_name, _)) in + zip(arguments, function_definition.arguments) + { + // TODO: allow expressions as arguments: create a new variable instead of mapping when a value needs to be computed + let calling_arg = match calling_expr { + Expression::VariableReference(var) => var, + expr => r_panic!( + "Expected variable target in function call argument, \ +found expression `{expr}`. General expressions as \ +function arguments are not supported." + ), + }; argument_translation_scope - .create_mapped_variable(arg_name.clone(), &calling_argument)?; + .create_mapped_variable(arg_name, &calling_arg)?; } - // recurse - let function_scope = self.compile( + // recursively compile the function block + let function_scope = self.create_ir_scope( &function_definition.block, Some(&argument_translation_scope), )?; argument_translation_scope .instructions - .extend(function_scope.finalise_instructions(true)); + .extend(function_scope.build_ir(true)); - // extend the inner scope instructions onto the outer scope - // maybe function call compiling should be its own function? + // add the recursively compiled instructions to the current scope's built instructions + // TODO: figure out why this .build_ir() call uses clean_up_variables = false scope .instructions - .extend(argument_translation_scope.finalise_instructions(false)); + .extend(argument_translation_scope.build_ir(false)); } Clause::DefineStruct { name: _, fields: _ } | Clause::DefineFunction { name: _, arguments: _, block: _, - } => unreachable!(), + } + | Clause::None => unreachable!(), } } @@ -626,214 +655,13 @@ impl Compiler<'_> { } } -// not sure if this should be in the scope impl? -// helper function for a common use-case -// flatten an expression and add it to a specific cell (using copies and adds, etc) -fn _add_expr_to_cell( - scope: &mut Scope, - expr: &Expression, - cell: CellReference, -) -> Result<(), String> { - let (imm, adds, subs) = expr.flatten()?; - - scope.push_instruction(Instruction::AddToCell(cell.clone(), imm)); - - let mut adds_set = HashMap::new(); - for var in adds { - let n = adds_set.remove(&var).unwrap_or(0); - adds_set.insert(var, n + 1); - } - for var in subs { - let n = adds_set.remove(&var).unwrap_or(0); - adds_set.insert(var, n - 1); - } - - for (source, constant) in adds_set { - let source_cell = scope.get_cell(&source)?; - _copy_cell(scope, source_cell, cell.clone(), constant); - } - - Ok(()) -} - -//This function allows you to add a self referencing expression to the cell -//Separate this to ensure that normal expression don't require the overhead of copying -fn _add_self_referencing_expr_to_cell( - scope: &mut Scope, - expr: Expression, - cell: CellReference, - pre_clear: bool, -) -> Result<(), String> { - //Create a new temp cell to store the current cell value - let temp_mem_id = scope.push_memory_id(); - scope.push_instruction(Instruction::Allocate( - Memory::Cell { id: temp_mem_id }, - None, - )); - let temp_cell = CellReference { - memory_id: temp_mem_id, - index: None, - }; - // TODO: make this more efficent by not requiring a clear cell after, - // i.e. simple move instead of copy by default for set operations (instead of +=) - _copy_cell(scope, cell, temp_cell, 1); - // Then if we are doing a += don't pre-clear otherwise Clear the current cell and run the same actions as _add_expr_to_cell - if pre_clear { - scope.push_instruction(Instruction::ClearCell(cell.clone())); - } - - let (imm, adds, subs) = expr.flatten()?; - - scope.push_instruction(Instruction::AddToCell(cell.clone(), imm)); - - let mut adds_set = HashMap::new(); - for var in adds { - let n = adds_set.remove(&var).unwrap_or(0); - adds_set.insert(var, n + 1); - } - for var in subs { - let n = adds_set.remove(&var).unwrap_or(0); - adds_set.insert(var, n - 1); - } - - for (source, constant) in adds_set { - let source_cell = scope.get_cell(&source)?; - //If we have an instance of the original cell being added simply use our temp cell value - // (crucial special sauce) - if source_cell.memory_id == cell.memory_id && source_cell.index == cell.index { - _copy_cell(scope, temp_cell, cell.clone(), constant); - } else { - _copy_cell(scope, source_cell, cell.clone(), constant); - } - } - //Cleanup - scope.push_instruction(Instruction::ClearCell(temp_cell)); - scope.push_instruction(Instruction::Free(temp_mem_id)); - - Ok(()) -} - -/// Helper function to copy a cell from one to another leaving the original unaffected -// TODO: make one for draining a cell -fn _copy_cell( - scope: &mut Scope, - source_cell: CellReference, - target_cell: CellReference, - constant: i32, -) { - if constant == 0 { - return; - } - // allocate a temporary cell - let temp_mem_id = scope.push_memory_id(); - scope.push_instruction(Instruction::Allocate( - Memory::Cell { id: temp_mem_id }, - None, - )); - let temp_cell = CellReference { - memory_id: temp_mem_id, - index: None, - }; - // copy source to target and temp - scope.push_instruction(Instruction::OpenLoop(source_cell)); - scope.push_instruction(Instruction::AddToCell(target_cell, constant as u8)); - scope.push_instruction(Instruction::AddToCell(temp_cell, 1)); - scope.push_instruction(Instruction::AddToCell(source_cell, -1i8 as u8)); - scope.push_instruction(Instruction::CloseLoop(source_cell)); - // copy back from temp - scope.push_instruction(Instruction::OpenLoop(temp_cell)); - scope.push_instruction(Instruction::AddToCell(source_cell, 1)); - scope.push_instruction(Instruction::AddToCell(temp_cell, -1i8 as u8)); - scope.push_instruction(Instruction::CloseLoop(temp_cell)); - scope.push_instruction(Instruction::Free(temp_mem_id)); -} - -// this is subject to change -#[derive(Debug, Clone)] -pub enum Instruction { - Allocate(Memory, Option), - Free(MemoryId), // the number indicates which cell in the allocation stack should be freed (cell 0, is the top of the stack, 1 is the second element, etc) - OpenLoop(CellReference), // same with other numbers here, they indicate the cell in the allocation stack to use in the instruction - CloseLoop(CellReference), // pass in the cell id, this originally wasn't there but may be useful later on - AddToCell(CellReference, u8), - InputToCell(CellReference), - ClearCell(CellReference), // not sure if this should be here, seems common enough that it should be - AssertCellValue(CellReference, Option), // allows the user to hand-tune optimisations further - OutputCell(CellReference), - InsertBrainfuckAtCell(Vec, CellLocation), -} - -#[derive(Debug, Clone)] -/// Either a fixed constant cell or a reference to some existing memory -pub enum CellLocation { - Unspecified, - FixedCell((i32, i32)), - MemoryCell(CellReference), -} - -#[derive(Debug, Clone)] -pub enum Memory { - Cell { - id: MemoryId, - }, - Cells { - id: MemoryId, - len: usize, - }, - /// A memory cell that references a previously allocated cell in an outer scope, used for function arguments - MappedCell { - id: MemoryId, - index: Option, - }, - /// Memory mapped cells, referencing previously allocated cells in an outer scope - MappedCells { - id: MemoryId, - start_index: usize, - len: usize, - }, - // infinite cell something (TODO?) -} -pub type MemoryId = usize; - -#[derive(Debug, Clone, Copy)] -pub struct CellReference { - pub memory_id: MemoryId, - pub index: Option, -} - -impl Memory { - pub fn id(&self) -> MemoryId { - match self { - Memory::Cell { id } - | Memory::Cells { id, len: _ } - | Memory::MappedCell { id, index: _ } - | Memory::MappedCells { - id, - start_index: _, - len: _, - } => *id, - } - } - pub fn len(&self) -> usize { - match self { - Memory::Cell { id: _ } | Memory::MappedCell { id: _, index: _ } => 1, - Memory::Cells { id: _, len } - | Memory::MappedCells { - id: _, - start_index: _, - len, - } => *len, - } - } -} - #[derive(Clone, Debug)] /// Scope type represents a Mastermind code block, /// any variables or functions defined within a {block} are owned by the scope and cleaned up before continuing -pub struct Scope<'a> { +pub struct ScopeBuilder<'a, TC, OC> { /// a reference to the parent scope, for accessing things defined outside of this scope - outer_scope: Option<&'a Scope<'a>>, - /// fn_only: true if syntactic context instead of normal context. + outer_scope: Option<&'a ScopeBuilder<'a, TC, OC>>, + /// If true, scope is not able to access variables from outer scope. /// Used for embedded mm so that the inner mm can use outer functions but not variables. types_only: bool, @@ -844,154 +672,21 @@ pub struct Scope<'a> { variable_memory: HashMap, /// Functions accessible by any code within or in the current scope - functions: Vec<(String, Vec<(String, ValueType)>, Vec)>, + functions: Vec<(String, Vec<(String, ValueType)>, Vec>)>, /// Struct types definitions structs: HashMap, /// Intermediate instructions generated by the compiler - instructions: Vec, -} - -#[derive(Clone, Debug)] // probably shouldn't be cloning here but whatever -struct Function { - arguments: Vec<(String, ValueType)>, - block: Vec, -} - -#[derive(Clone, Debug, PartialEq, Eq)] -/// an absolute definition of a type, as opposed to `VariableTypeReference` which is more of a reference -enum ValueType { - Cell, - Array(usize, Box), - DictStruct(Vec<(String, ValueType, Option)>), - // TupleStruct(Vec), + instructions: Vec>, } -#[derive(Clone, Debug)] -/// equivalent to ValueType::DictStruct enum variant, -/// Rust doesn't support enum variants as types yet so need this workaround for struct definitions in scope object -struct DictStructType(Vec<(String, ValueType, Option)>); -impl ValueType { - fn from_struct(struct_def: DictStructType) -> Self { - ValueType::DictStruct(struct_def.0) - } - - // TODO: make size() and get_and_validate_subfield_cell_map() more efficient, - // currently these two recurse back and forth and are a bit of a monster combo - - /// return the type size in cells - fn size(&self) -> Result { - Ok(match self { - ValueType::Cell => 1, - ValueType::Array(len, value_type) => *len * value_type.size()?, - ValueType::DictStruct(fields) => Self::get_and_validate_subfield_cell_map(fields)?.1, - }) - } - - /// deterministically place all struct subfields on a non-negative cell, return the positions of each and the total length - /// return Err() if location specified subfields overlap - fn get_and_validate_subfield_cell_map( - fields: &Vec<(String, ValueType, Option)>, - ) -> Result<(HashMap<&String, (usize, &ValueType)>, usize), String> { - // (set of cells, max cell) - let mut cell_map = HashMap::new(); - - // map of field names and their starting cells - let mut subfield_map = HashMap::new(); - let mut max_cell = 0usize; - let mut unfixed_fields = vec![]; - // handle the cells with specified locations - for (field_name, field_type, field_location) in fields { - match field_location { - Some(location) => { - subfield_map.insert(field_name, (*location, field_type)); - for cell_index in *location..(*location + field_type.size()?) { - // this assumes the field locations have been validated - if let Some(other_name) = cell_map.insert(cell_index, field_name) { - r_panic!( - "Subfields \"{other_name}\" and \"{field_name}\" overlap in struct." - ); - }; - max_cell = max_cell.max(cell_index); - } - } - None => { - unfixed_fields.push((field_name, field_type)); - } - } - } - - for (field_name, field_type) in unfixed_fields { - let field_size = field_type.size()?; - // repeatedly try to insert the fields into leftover memory locations - let mut start_index = 0usize; - for cur_index in 0.. { - if cell_map.contains_key(&cur_index) { - start_index = cur_index + 1; - } else if (cur_index - start_index + 1) >= field_size { - // found a run with the right amount of cells free - break; - } - } - subfield_map.insert(field_name, (start_index, field_type)); - for cell_index in start_index..(start_index + field_size) { - // inefficient but whatever, this insert is not necessary - cell_map.insert(cell_index, field_name); - max_cell = max_cell.max(cell_index); - } - } - - let size = max_cell + 1; - - Ok((subfield_map, size)) - } - - /// get a subfield's type as well as memory cell index - pub fn get_subfield( - &self, - subfield_chain: &VariableTargetReferenceChain, - ) -> Result<(&ValueType, usize), String> { - let mut cur_field = self; - let mut cur_index = 0; - for subfield_ref in subfield_chain.0.iter() { - match (cur_field, subfield_ref) { - (ValueType::Array(len, element_type), Reference::Index(index)) => { - r_assert!( - index < len, - "Index \"{subfield_ref}\" must be less than array length ({len})." - ); - cur_index += element_type.size()? * index; - cur_field = element_type; - } - (ValueType::DictStruct(fields), Reference::NamedField(subfield_name)) => { - let (subfield_map, _size) = Self::get_and_validate_subfield_cell_map(fields)?; - let Some((subfield_cell_offset, subfield_type)) = - subfield_map.get(subfield_name) - else { - r_panic!("Could not find subfield \"{subfield_ref}\" in struct type") - }; - cur_index += subfield_cell_offset; - cur_field = subfield_type; - } - - (ValueType::DictStruct(_), Reference::Index(_)) => { - r_panic!("Cannot read index subfield \"{subfield_ref}\" of struct type.") - } - (ValueType::Array(_, _), Reference::NamedField(_)) => { - r_panic!("Cannot read named subfield \"{subfield_ref}\" of array type.") - } - (ValueType::Cell, subfield_ref) => { - r_panic!("Attempted to get subfield \"{subfield_ref}\" of cell type.") - } - } - } - Ok((cur_field, cur_index)) - } -} - -impl Scope<'_> { - pub fn new() -> Scope<'static> { - Scope { +impl ScopeBuilder<'_, TC, OC> +where + TC: Display + Clone, + OC: Clone, +{ + pub fn new() -> ScopeBuilder<'static, TC, OC> { + ScopeBuilder { outer_scope: None, types_only: false, allocations: 0, @@ -1002,9 +697,10 @@ impl Scope<'_> { } } + // regarding `clean_up_variables`: // I don't love this system of deciding what to clean up at the end in this specific function, but I'm not sure what the best way to achieve this would be // this used to be called "get_instructions" but I think this more implies things are being modified - pub fn finalise_instructions(mut self, clean_up_variables: bool) -> Vec { + pub fn build_ir(mut self, clean_up_variables: bool) -> Vec> { if !clean_up_variables { return self.instructions; } @@ -1047,13 +743,13 @@ impl Scope<'_> { self.instructions } - fn push_instruction(&mut self, instruction: Instruction) { + fn push_instruction(&mut self, instruction: Instruction) { self.instructions.push(instruction); } /// Open a scope within the current one, any time there is a {} in Mastermind, this is called - fn open_inner(&self) -> Scope { - Scope { + fn open_inner(&self) -> ScopeBuilder { + ScopeBuilder { outer_scope: Some(self), types_only: false, allocations: 0, @@ -1066,8 +762,8 @@ impl Scope<'_> { // syntactic context instead of normal context // used for embedded mm so that the inner mm can use outer functions - fn open_inner_templates_only(&self) -> Scope { - Scope { + fn open_inner_templates_only(&self) -> ScopeBuilder { + ScopeBuilder { outer_scope: Some(self), types_only: true, allocations: 0, @@ -1079,7 +775,7 @@ impl Scope<'_> { } /// Get the correct variable type and allocate the right amount of cells for it - fn allocate_variable(&mut self, var: VariableDefinition) -> Result<&ValueType, String> { + fn allocate_variable(&mut self, var: VariableTypeDefinition) -> Result<&ValueType, String> { r_assert!( !self.variable_memory.contains_key(&var.name), "Cannot allocate variable {var} twice in the same scope" @@ -1144,59 +840,55 @@ impl Scope<'_> { } } + /// find a function definition based on name and argument types (unaffected by the self.fn_only flag) fn get_function( &self, calling_name: &str, - calling_arg_types: &Vec<&ValueType>, - ) -> Result { - // this function is unaffected by the self.fn_only flag - Ok( - if let Some(func) = self.functions.iter().find(|(name, args, _)| { - if name != calling_name || args.len() != calling_arg_types.len() { + calling_arg_types: &Vec, + ) -> Result, String> { + if let Some(func) = self.functions.iter().find(|(name, args, _)| { + if name != calling_name || args.len() != calling_arg_types.len() { + return false; + } + for ((_, arg_type), calling_arg_type) in zip(args, calling_arg_types) { + if *arg_type != *calling_arg_type { return false; } - for ((_, arg_type), calling_arg_type) in zip(args, calling_arg_types) { - if *arg_type != **calling_arg_type { - return false; - } - } - true - }) { - // TODO: stop cloning! This function overload stuff is tacked on and needs refactoring - let (_, arguments, block) = func.clone(); - Function { arguments, block } - } else if let Some(outer_scope) = self.outer_scope { - outer_scope.get_function(calling_name, calling_arg_types)? - } else { - r_panic!("Could not find function \"{calling_name}\" with correct arguments in current scope"); - }, - ) + } + true + }) { + // TODO: stop cloning! This function overload stuff is tacked on and needs refactoring + let (_, arguments, block) = func; + return Ok(Function { + arguments: arguments.clone(), + block: block.clone(), + }); + } + + if let Some(outer_scope) = self.outer_scope { + return outer_scope.get_function(calling_name, calling_arg_types); + } + + r_panic!( + "Could not find function \"{calling_name}\" with correct arguments in current scope" + ); } /// Define a struct in this scope fn register_struct_definition( &mut self, struct_name: &str, - fields: Vec, + fields: Vec, ) -> Result<(), String> { let mut absolute_fields = vec![]; - for var_def in fields { - let absolute_type = self.create_absolute_type(&var_def.var_type)?; - let non_neg_location_specifier = match &var_def.location_specifier { - LocationSpecifier::None => None, - LocationSpecifier::Cell(l) => { - // assert the y coordinate is 0 - r_assert!(l.1 == 0, "Struct field location specifiers do not support 2D grid cells: {var_def}"); - r_assert!( - l.0 >= 0, - "Struct field location specifiers must be non-negative: {var_def}" - ); - Some(l.0 as usize) - } - LocationSpecifier::Variable(_) => r_panic!("Location specifiers in struct definitions must be relative, not variables: {var_def}"), - }; - absolute_fields.push((var_def.name, absolute_type, non_neg_location_specifier)); + for field_def in fields { + let absolute_type = self.create_absolute_type(&field_def.field_type)?; + absolute_fields.push(( + field_def.name, + absolute_type, + field_def.location_offset_specifier, + )); } let None = self @@ -1213,10 +905,10 @@ impl Scope<'_> { fn register_function_definition( &mut self, new_function_name: &str, - new_arguments: Vec, - new_block: Vec, + new_arguments: Vec>, + new_block: Vec>, ) -> Result<(), String> { - let absolute_arguments = new_arguments + let absolute_arguments: Vec<(String, ValueType)> = new_arguments .into_iter() .map(|f| { let LocationSpecifier::None = f.location_specifier else { @@ -1224,8 +916,9 @@ impl Scope<'_> { }; Ok((f.name, self.create_absolute_type(&f.var_type)?)) }) - .collect::, _>>()?; + .collect::, String>>()?; + // TODO: refactor this: // This is some fucked C-style loop break logic, basically GOTOs // basically it only gets to the panic if the functions have identical signature (except argument names) 'func_loop: for (name, args, _) in self.functions.iter() { @@ -1249,14 +942,14 @@ impl Scope<'_> { /// Recursively find the definition of a struct type by searching up the scope call stack fn get_struct_definition(&self, struct_name: &str) -> Result<&DictStructType, String> { - Ok(if let Some(struct_def) = self.structs.get(struct_name) { - struct_def + if let Some(struct_def) = self.structs.get(struct_name) { + Ok(struct_def) } else if let Some(outer_scope) = self.outer_scope { // recurse - outer_scope.get_struct_definition(struct_name)? + outer_scope.get_struct_definition(struct_name) } else { r_panic!("No definition found for struct \"{struct_name}\"."); - }) + } } /// Construct an absolute type from a type reference @@ -1278,15 +971,15 @@ impl Scope<'_> { // get the absolute type of the variable, as well as the memory allocations let (full_type, memory) = self.get_base_variable_memory(&target.name)?; // get the correct index within the memory and return - Ok(match (&target.subfields, full_type, memory) { - (None, ValueType::Cell, Memory::Cell { id }) => CellReference { + match (&target.subfields, full_type, memory) { + (None, ValueType::Cell, Memory::Cell { id }) => Ok(CellReference { memory_id: *id, index: None, - }, - (None, ValueType::Cell, Memory::MappedCell { id, index }) => CellReference { + }), + (None, ValueType::Cell, Memory::MappedCell { id, index }) => Ok(CellReference { memory_id: *id, index: *index, - }, + }), ( Some(subfield_chain), ValueType::Array(_, _) | ValueType::DictStruct(_), @@ -1302,7 +995,7 @@ impl Scope<'_> { r_panic!("Expected cell type in variable target: {target}"); }; r_assert!(cell_index < *len, "Cell reference out of bounds on variable target: {target}. This should not occur."); - CellReference { + Ok(CellReference { memory_id: *id, index: Some(match memory { Memory::Cells { id: _, len: _ } => cell_index, @@ -1313,7 +1006,7 @@ impl Scope<'_> { } => *start_index + cell_index, _ => unreachable!(), }), - } + }) } // valid states, user error ( @@ -1349,7 +1042,7 @@ impl Scope<'_> { ) => r_panic!( "Invalid memory for value type in target: {target}. This should not occur." ), - }) + } } /// Return a list of cell references for an array of cells (not an array of structs) @@ -1511,11 +1204,16 @@ impl Scope<'_> { /// Return the absolute type and memory allocation for a variable name fn get_base_variable_memory(&self, var_name: &str) -> Result<(&ValueType, &Memory), String> { - // TODO: add function argument translations and embedded bf/mmi scope function restrictions - match (self.outer_scope, self.variable_memory.get(var_name)) { - (_, Some((value_type, memory))) => Ok((value_type, memory)), - (Some(outer_scope), None) => outer_scope.get_base_variable_memory(var_name), - (None, None) => r_panic!("No variable found with name \"{var_name}\"."), + match ( + self.outer_scope, + self.types_only, + self.variable_memory.get(var_name), + ) { + (_, _, Some((value_type, memory))) => Ok((value_type, memory)), + (Some(outer_scope), false, None) => outer_scope.get_base_variable_memory(var_name), + (None, _, None) | (Some(_), true, None) => { + r_panic!("No variable found in scope with name \"{var_name}\".") + } } } @@ -1531,7 +1229,8 @@ impl Scope<'_> { }) } - /// Create memory mapping between a pre-existing variable and a new one, used for function arguments + /// Create memory mapping between a pre-existing variable and a new one, used for function arguments. + /// This could be used for copy by reference of subfields in future. fn create_mapped_variable( &mut self, mapped_var_name: String, @@ -1625,4 +1324,333 @@ mapping: {mapped_var_name} -> {target}" .insert(mapped_var_name, (var_type.clone(), mapped_memory)); Ok(()) } + + /// Get the final type of an expression. + /// (technically unnecessary right now, but can be used to implement expressions as function arguments in future) + fn get_expression_type(&self, expr: &Expression) -> Result { + Ok(match expr { + Expression::NaturalNumber(_) => ValueType::Cell, + Expression::SumExpression { sign: _, summands } => { + let Some(_) = summands.first() else { + r_panic!( + "Cannot infer expression type because sum \ +expression has no elements: `{expr}`." + ); + }; + // TODO: decide if the summands' types should be verified here or not + for summand in summands { + match self.get_expression_type(summand)? { + ValueType::Cell => (), + summand_type => { + r_panic!( + "Sum expressions must be comprised of cell-types: \ +found `{summand_type}` in `{expr}`" + ); + } + }; + } + ValueType::Cell + } + Expression::VariableReference(var) => self.get_target_type(var)?.clone(), + Expression::ArrayLiteral(elements) => { + let mut elements_iter = elements.iter(); + let Some(first_element) = elements_iter.next() else { + r_panic!( + "Cannot infer expression type because \ +array literal has no elements: `{expr}`." + ); + }; + let first_element_type = self.get_expression_type(first_element)?; + for element in elements_iter { + let element_type = self.get_expression_type(element)?; + r_assert!( + element_type == first_element_type, + "All elements in array expressions must have the \ +same type: found `{element_type}` in `{expr}`" + ); + } + ValueType::Array(elements.len(), Box::new(first_element_type)) + } + Expression::StringLiteral(s) => ValueType::Array(s.len(), Box::new(ValueType::Cell)), + }) + } + + /// helper function for a common use-case: + /// flatten an expression and add it to a specific cell (using copies and adds, etc) + fn _add_expr_to_cell(&mut self, expr: &Expression, cell: CellReference) -> Result<(), String> { + let (imm, adds, subs) = expr.flatten()?; + + self.push_instruction(Instruction::AddToCell(cell.clone(), imm)); + + let mut adds_set = HashMap::new(); + for var in adds { + let n = adds_set.remove(&var).unwrap_or(0); + adds_set.insert(var, n + 1); + } + for var in subs { + let n = adds_set.remove(&var).unwrap_or(0); + adds_set.insert(var, n - 1); + } + + for (source, constant) in adds_set { + let source_cell = self.get_cell(&source)?; + self._copy_cell(source_cell, cell.clone(), constant); + } + + Ok(()) + } + + /// helper function to add a self-referencing expression to a cell + /// this is separated because it requires another copy ontop of normal expressions + // TODO: refactor/fix underlying logic for this + fn _add_self_referencing_expr_to_cell( + &mut self, + expr: Expression, + cell: CellReference, + pre_clear: bool, + ) -> Result<(), String> { + //Create a new temp cell to store the current cell value + let temp_mem_id = self.push_memory_id(); + self.push_instruction(Instruction::Allocate( + Memory::Cell { id: temp_mem_id }, + None, + )); + let temp_cell = CellReference { + memory_id: temp_mem_id, + index: None, + }; + // TODO: make this more efficent by not requiring a clear cell after, + // i.e. simple move instead of copy by default for set operations (instead of +=) + self._copy_cell(cell, temp_cell, 1); + // Then if we are doing a += don't pre-clear otherwise Clear the current cell and run the same actions as _add_expr_to_cell + if pre_clear { + self.push_instruction(Instruction::ClearCell(cell.clone())); + } + + let (imm, adds, subs) = expr.flatten()?; + + self.push_instruction(Instruction::AddToCell(cell.clone(), imm)); + + let mut adds_set = HashMap::new(); + for var in adds { + let n = adds_set.remove(&var).unwrap_or(0); + adds_set.insert(var, n + 1); + } + for var in subs { + let n = adds_set.remove(&var).unwrap_or(0); + adds_set.insert(var, n - 1); + } + + for (source, constant) in adds_set { + let source_cell = self.get_cell(&source)?; + //If we have an instance of the original cell being added simply use our temp cell value + // (crucial special sauce) + if source_cell.memory_id == cell.memory_id && source_cell.index == cell.index { + self._copy_cell(temp_cell, cell.clone(), constant); + } else { + self._copy_cell(source_cell, cell.clone(), constant); + } + } + //Cleanup + self.push_instruction(Instruction::ClearCell(temp_cell)); + self.push_instruction(Instruction::Free(temp_mem_id)); + + Ok(()) + } + + /// Helper function to copy a cell from one to another, leaving the original unaffected + // TODO: make one for draining a cell + fn _copy_cell( + &mut self, + source_cell: CellReference, + target_cell: CellReference, + constant: i32, + ) { + if constant == 0 { + return; + } + // allocate a temporary cell + let temp_mem_id = self.push_memory_id(); + self.push_instruction(Instruction::Allocate( + Memory::Cell { id: temp_mem_id }, + None, + )); + let temp_cell = CellReference { + memory_id: temp_mem_id, + index: None, + }; + // copy source to target and temp + self.push_instruction(Instruction::OpenLoop(source_cell)); + self.push_instruction(Instruction::AddToCell(target_cell, constant as u8)); + self.push_instruction(Instruction::AddToCell(temp_cell, 1)); + self.push_instruction(Instruction::AddToCell(source_cell, -1i8 as u8)); + self.push_instruction(Instruction::CloseLoop(source_cell)); + // copy back from temp + self.push_instruction(Instruction::OpenLoop(temp_cell)); + self.push_instruction(Instruction::AddToCell(source_cell, 1)); + self.push_instruction(Instruction::AddToCell(temp_cell, -1i8 as u8)); + self.push_instruction(Instruction::CloseLoop(temp_cell)); + self.push_instruction(Instruction::Free(temp_mem_id)); + } +} + +// TODO: think about where to put these tests, and by extension where to put the scopebuilder +#[cfg(test)] +mod scope_builder_tests { + use crate::{ + backend::bf::{Opcode, TapeCell}, + parser::expressions::Sign, + }; + + use super::*; + + #[test] + fn variable_allocation_1() { + let mut scope = ScopeBuilder::::new(); + let allocated_type = scope.allocate_variable(VariableTypeDefinition { + name: String::from("var"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None, + }); + assert_eq!(allocated_type, Ok(&ValueType::Cell)); + } + + #[test] + fn get_expression_type_numbers_1() { + let scope = ScopeBuilder::::new(); + assert_eq!( + scope + .get_expression_type(&Expression::NaturalNumber(0)) + .unwrap(), + ValueType::Cell + ); + assert_eq!( + scope + .get_expression_type(&Expression::NaturalNumber(1)) + .unwrap(), + ValueType::Cell + ); + assert_eq!( + scope + .get_expression_type(&Expression::NaturalNumber(345678)) + .unwrap(), + ValueType::Cell + ); + } + + #[test] + fn get_expression_type_sums_1() { + let scope = ScopeBuilder::::new(); + assert_eq!( + scope + .get_expression_type(&Expression::SumExpression { + sign: Sign::Positive, + summands: vec![Expression::NaturalNumber(0)] + }) + .unwrap(), + ValueType::Cell + ); + assert_eq!( + scope + .get_expression_type(&Expression::SumExpression { + sign: Sign::Negative, + summands: vec![ + Expression::NaturalNumber(345678), + Expression::NaturalNumber(2) + ] + }) + .unwrap(), + ValueType::Cell + ); + assert_eq!( + scope + .get_expression_type(&Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![ + Expression::NaturalNumber(1), + Expression::NaturalNumber(2) + ] + }, + Expression::NaturalNumber(2) + ] + }) + .unwrap(), + ValueType::Cell + ); + } + + #[test] + fn get_expression_type_variables_1() { + let mut scope = ScopeBuilder::::new(); + scope + .allocate_variable(VariableTypeDefinition { + name: String::from("var"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None, + }) + .unwrap(); + assert_eq!( + scope + .get_expression_type(&Expression::VariableReference(VariableTarget { + name: String::from("var"), + subfields: None, + is_spread: false + })) + .unwrap(), + ValueType::Cell + ); + assert_eq!( + scope + .get_expression_type(&Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::VariableReference(VariableTarget { + name: String::from("var"), + subfields: None, + is_spread: false + }), + Expression::NaturalNumber(123) + ] + }) + .unwrap(), + ValueType::Cell + ); + } + + #[test] + fn get_expression_type_arrays_1() { + let mut scope = ScopeBuilder::::new(); + scope + .allocate_variable(VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 3), + location_specifier: LocationSpecifier::None, + }) + .unwrap(); + assert_eq!( + scope + .get_expression_type(&Expression::VariableReference(VariableTarget { + name: String::from("arr"), + subfields: None, + is_spread: false + })) + .unwrap(), + ValueType::Array(3, Box::new(ValueType::Cell)) + ); + assert_eq!( + scope + .get_expression_type(&Expression::VariableReference(VariableTarget { + name: String::from("arr"), + subfields: Some(VariableTargetReferenceChain(vec![Reference::Index(0)])), + is_spread: false + })) + .unwrap(), + ValueType::Cell + ); + } + + // TODO: make failure tests for expression types } diff --git a/compiler/src/frontend/mod.rs b/compiler/src/frontend/mod.rs new file mode 100644 index 0000000..c4ea8c8 --- /dev/null +++ b/compiler/src/frontend/mod.rs @@ -0,0 +1,2 @@ +pub mod frontend; +pub mod types; diff --git a/compiler/src/frontend/types.rs b/compiler/src/frontend/types.rs new file mode 100644 index 0000000..1550192 --- /dev/null +++ b/compiler/src/frontend/types.rs @@ -0,0 +1,248 @@ +use crate::{ + macros::macros::*, + parser::types::{Clause, Reference, VariableTargetReferenceChain}, +}; +use std::collections::HashMap; + +#[derive(Debug, Clone)] +pub enum Instruction { + Allocate(Memory, Option), + Free(MemoryId), // the number indicates which cell in the allocation stack should be freed (cell 0, is the top of the stack, 1 is the second element, etc) + OpenLoop(CellReference), // same with other numbers here, they indicate the cell in the allocation stack to use in the instruction + CloseLoop(CellReference), // pass in the cell id, this originally wasn't there but may be useful later on + AddToCell(CellReference, u8), + InputToCell(CellReference), + ClearCell(CellReference), // not sure if this should be here, seems common enough that it should be + AssertCellValue(CellReference, Option), // allows the user to hand-tune optimisations further + OutputCell(CellReference), + InsertBrainfuckAtCell(Vec, CellLocation), +} + +#[derive(Debug, Clone)] +/// Either a fixed constant cell or a reference to some existing memory +pub enum CellLocation { + Unspecified, + FixedCell(TC), + MemoryCell(CellReference), +} + +#[derive(Debug, Clone)] +pub enum Memory { + Cell { + id: MemoryId, + }, + Cells { + id: MemoryId, + len: usize, + }, + /// A memory cell that references a previously allocated cell in an outer scope, used for function arguments + MappedCell { + id: MemoryId, + index: Option, + }, + /// Memory mapped cells, referencing previously allocated cells in an outer scope + MappedCells { + id: MemoryId, + start_index: usize, + len: usize, + }, + // infinite cell something (TODO?) +} +pub type MemoryId = usize; + +#[derive(Debug, Clone, Copy)] +pub struct CellReference { + pub memory_id: MemoryId, + pub index: Option, +} + +impl Memory { + pub fn id(&self) -> MemoryId { + match self { + Memory::Cell { id } + | Memory::Cells { id, len: _ } + | Memory::MappedCell { id, index: _ } + | Memory::MappedCells { + id, + start_index: _, + len: _, + } => *id, + } + } + pub fn len(&self) -> usize { + match self { + Memory::Cell { id: _ } | Memory::MappedCell { id: _, index: _ } => 1, + Memory::Cells { id: _, len } + | Memory::MappedCells { + id: _, + start_index: _, + len, + } => *len, + } + } +} + +#[derive(Clone, Debug)] // probably shouldn't be cloning here but whatever +pub struct Function { + pub arguments: Vec<(String, ValueType)>, + pub block: Vec>, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +/// an absolute definition of a type, as opposed to `VariableTypeReference` which is more of a reference +pub enum ValueType { + Cell, + Array(usize, Box), + DictStruct(Vec<(String, ValueType, Option)>), + // TupleStruct(Vec), +} + +#[derive(Clone, Debug)] +/// equivalent to ValueType::DictStruct enum variant, +/// Rust doesn't support enum variants as types yet so need this workaround for struct definitions in scope object +pub struct DictStructType(pub Vec<(String, ValueType, Option)>); +impl ValueType { + pub fn from_struct(struct_def: DictStructType) -> Self { + ValueType::DictStruct(struct_def.0) + } + + // TODO: make size() and get_and_validate_subfield_cell_map() more efficient, + // currently these two recurse back and forth and are a bit of a monster combo + + /// return the type size in cells + pub fn size(&self) -> Result { + Ok(match self { + ValueType::Cell => 1, + ValueType::Array(len, value_type) => *len * value_type.size()?, + ValueType::DictStruct(fields) => Self::get_and_validate_subfield_cell_map(fields)?.1, + }) + } + + /// deterministically place all struct subfields on a non-negative cell, return the positions of each and the total length + /// return Err() if location specified subfields overlap + pub fn get_and_validate_subfield_cell_map( + fields: &Vec<(String, ValueType, Option)>, + ) -> Result<(HashMap<&String, (usize, &ValueType)>, usize), String> { + // (set of cells, max cell) + let mut cell_map = HashMap::new(); + + // map of field names and their starting cells + let mut subfield_map = HashMap::new(); + let mut max_cell = 0usize; + let mut unfixed_fields = vec![]; + // handle the cells with specified locations + for (field_name, field_type, field_location) in fields { + match field_location { + Some(location) => { + subfield_map.insert(field_name, (*location, field_type)); + for cell_index in *location..(*location + field_type.size()?) { + // this assumes the field locations have been validated + if let Some(other_name) = cell_map.insert(cell_index, field_name) { + r_panic!( + "Subfields \"{other_name}\" and \"{field_name}\" overlap in struct." + ); + }; + max_cell = max_cell.max(cell_index); + } + } + None => { + unfixed_fields.push((field_name, field_type)); + } + } + } + + for (field_name, field_type) in unfixed_fields { + let field_size = field_type.size()?; + // repeatedly try to insert the fields into leftover memory locations + let mut start_index = 0usize; + for cur_index in 0.. { + if cell_map.contains_key(&cur_index) { + start_index = cur_index + 1; + } else if (cur_index - start_index + 1) >= field_size { + // found a run with the right amount of cells free + break; + } + } + subfield_map.insert(field_name, (start_index, field_type)); + for cell_index in start_index..(start_index + field_size) { + // inefficient but whatever, this insert is not necessary + cell_map.insert(cell_index, field_name); + max_cell = max_cell.max(cell_index); + } + } + + let size = max_cell + 1; + + Ok((subfield_map, size)) + } + + /// get a subfield's type as well as memory cell index + pub fn get_subfield( + &self, + subfield_chain: &VariableTargetReferenceChain, + ) -> Result<(&ValueType, usize), String> { + let mut cur_field = self; + let mut cur_index = 0; + for subfield_ref in subfield_chain.0.iter() { + match (cur_field, subfield_ref) { + (ValueType::Array(len, element_type), Reference::Index(index)) => { + r_assert!( + index < len, + "Index \"{subfield_ref}\" must be less than array length ({len})." + ); + cur_index += element_type.size()? * index; + cur_field = element_type; + } + (ValueType::DictStruct(fields), Reference::NamedField(subfield_name)) => { + let (subfield_map, _size) = Self::get_and_validate_subfield_cell_map(fields)?; + let Some((subfield_cell_offset, subfield_type)) = + subfield_map.get(subfield_name) + else { + r_panic!("Could not find subfield \"{subfield_ref}\" in struct type") + }; + cur_index += subfield_cell_offset; + cur_field = subfield_type; + } + + (ValueType::DictStruct(_), Reference::Index(_)) => { + r_panic!("Cannot read index subfield \"{subfield_ref}\" of struct type.") + } + (ValueType::Array(_, _), Reference::NamedField(_)) => { + r_panic!("Cannot read named subfield \"{subfield_ref}\" of array type.") + } + (ValueType::Cell, subfield_ref) => { + r_panic!("Attempted to get subfield \"{subfield_ref}\" of cell type.") + } + } + } + Ok((cur_field, cur_index)) + } +} + +impl std::fmt::Display for ValueType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ValueType::Cell => { + f.write_str("cell")?; + } + ValueType::Array(length, element_type) => { + f.write_fmt(format_args!("{element_type}[{length}]"))?; + } + ValueType::DictStruct(fields) => { + f.write_str("{")?; + let fields_len = fields.len(); + for (i, (field_name, field_type, offset)) in fields.iter().enumerate() { + f.write_fmt(format_args!("{field_type} {field_name}"))?; + if let Some(offset) = offset { + f.write_fmt(format_args!(" @{offset}"))?; + } + f.write_str(";")?; + if i < (fields_len - 1) { + f.write_str(" ")?; + } + } + } + } + Ok(()) + } +} diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 955aea5..66dfd01 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -1,39 +1,37 @@ #![allow(dead_code)] +// dead code is allowed because we have two different compile targets (wasm and command-line) -mod macros; - -// allowing dead code because we have two different compile targets (wasm and command-line) +// project dependencies: +mod backend; mod brainfuck; mod brainfuck_optimiser; -mod builder; -mod compiler; -mod constants_optimiser; +mod frontend; +mod macros; mod misc; mod parser; mod preprocessor; -mod tokeniser; - -use brainfuck::{BVMConfig, BVM}; -use brainfuck_optimiser::optimise; -use builder::{BrainfuckOpcodes, Builder}; -use compiler::Compiler; -use misc::MastermindConfig; -use parser::parse; -use preprocessor::preprocess_from_memory; -use tokeniser::tokenise; +mod tests; +use crate::{ + backend::{ + bf::{Opcode, TapeCell}, + bf2d::{Opcode2D, TapeCell2D}, + common::BrainfuckProgram, + }, + brainfuck::{BrainfuckConfig, BrainfuckContext}, + misc::MastermindContext, + parser::parser::parse_program, + preprocessor::{preprocess_from_memory, strip_comments}, +}; +// stdlib dependencies: use std::collections::HashMap; +// external dependencies: use wasm_bindgen::{prelude::wasm_bindgen, JsValue}; -// copied from rustwasm.github.io pub fn set_panic_hook() { - // When the `console_error_panic_hook` feature is enabled, we can call the - // `set_panic_hook` function at least once during initialization, and then - // we will get better error messages if our code ever panics. - // - // For more details see - // https://github.com/rustwasm/console_error_panic_hook#readme + // copied from rustwasm.github.io + // https://github.com/rustwasm/console_error_panic_hook #[cfg(feature = "console_error_panic_hook")] console_error_panic_hook::set_once(); } @@ -48,20 +46,29 @@ pub fn wasm_compile( let file_contents: HashMap = serde_wasm_bindgen::from_value(file_contents).unwrap(); - let config: MastermindConfig = serde_wasm_bindgen::from_value(config).unwrap(); - let compiler = Compiler { config: &config }; - let builder = Builder { config: &config }; + let ctx = MastermindContext { + config: serde_wasm_bindgen::from_value(config).unwrap(), + }; let preprocessed_file = preprocess_from_memory(&file_contents, entry_file_name)?; - let tokens = tokenise(&preprocessed_file)?; - let parsed = parse(&tokens)?; - let instructions = compiler.compile(&parsed, None)?; - let bf_code = builder.build(instructions.finalise_instructions(false), false)?; + let stripped_file = strip_comments(&preprocessed_file); + if ctx.config.enable_2d_grid { + let parsed_syntax = parse_program::(&stripped_file)?; + let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); + let bf_code = ctx.ir_to_bf(instructions, None)?; + Ok(bf_code.to_string()) + } else { + let parsed_syntax = parse_program::(&stripped_file)?; + let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); + let bf_code = ctx.ir_to_bf(instructions, None)?; + Ok(bf_code.to_string()) + } - Ok(match config.optimise_generated_code { - true => optimise(bf_code, config.optimise_generated_all_permutations).to_string(), - false => bf_code.to_string(), - }) + // TODO: fix optimisations + // Ok(match ctx.config.optimise_generated_code { + // true => ctx.optimise_bf_code(bf_code).to_string(), + // false => bf_code.to_string(), + // }) } #[wasm_bindgen] @@ -73,14 +80,16 @@ pub async fn wasm_run_bf( ) -> Result { set_panic_hook(); - let config = BVMConfig { - enable_debug_symbols: false, - enable_2d_grid: enable_2d_grid, + let ctx = BrainfuckContext { + config: BrainfuckConfig { + enable_debug_symbols: false, + enable_2d_grid: enable_2d_grid, + }, }; - let mut bf = BVM::new(config, code.chars().collect()); - // hack, TODO: refactor - let r = bf.run_async(output_callback, input_callback).await?; + let r = ctx + .run_async(code.chars().collect(), output_callback, input_callback) + .await?; Ok(r) } diff --git a/compiler/src/main.rs b/compiler/src/main.rs index 9b38a0f..685201f 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -1,31 +1,33 @@ #![allow(dead_code)] - +// dead code is allowed because we have two different compile targets (wasm and command-line) + +// project dependencies: +mod backend; +mod brainfuck; +mod brainfuck_optimiser; +mod frontend; +#[macro_use] mod macros; - -// Stages: (rust format has jumbled these) -mod brainfuck; // 6. Run -mod brainfuck_optimiser; // 5. Post-Optimise -mod builder; // 4. Build (and pre-optimise) -mod compiler; // 3. Compile -mod constants_optimiser; // a component of 4 -mod parser; // 2. Parse -mod preprocessor; // 0. Preprocess includes and macro-type stuff -mod tokeniser; // 1. Tokenise - mod misc; +mod parser; +mod preprocessor; mod tests; - -use brainfuck::{BVMConfig, BVM}; -use brainfuck_optimiser::optimise; -use builder::{BrainfuckOpcodes, Builder}; -use compiler::Compiler; -use misc::MastermindConfig; -use parser::parse; -use preprocessor::preprocess; -use tokeniser::tokenise; - +use crate::{ + backend::{ + bf::{Opcode, TapeCell}, + bf2d::{Opcode2D, TapeCell2D}, + common::BrainfuckProgram, + }, + brainfuck::{BrainfuckConfig, BrainfuckContext}, + misc::{MastermindConfig, MastermindContext}, + parser::parser::parse_program, + preprocessor::{preprocess, strip_comments}, +}; + +// stdlib dependencies: use std::io::{stdin, stdout, Cursor}; +// external dependencies: use clap::Parser; #[derive(Parser, Default, Debug)] @@ -70,67 +72,74 @@ struct Arguments { } fn main() -> Result<(), String> { + // TODO: clean up this crazy file, this was the first ever rust I wrote and it's messy std::env::set_var("RUST_BACKTRACE", "1"); let args = Arguments::parse(); - let config = MastermindConfig::new(args.optimise); + let ctx = MastermindContext { + // TODO: change this to not be a bitmask, or at least document it + config: MastermindConfig::new(args.optimise), + }; - let program; - match args.file { + let program = match args.file { Some(file) => { let file_path = std::path::PathBuf::from(file); // c-style preprocessor (includes and maybe some simple conditionals to avoid double includes) - program = preprocess(file_path); - } - None => { - program = args.program.unwrap(); + preprocess(file_path) } + None => args.program.unwrap(), }; let bf_program = match args.compile { true => { + let stripped_program = strip_comments(&program); // compile the provided file - - let tokens = tokenise(&program)?; - // parse tokens into syntax tree - let clauses = parse(&tokens)?; - // compile syntax tree into brainfuck - - // 2 stage compilation step, first stage compiles syntax tree into low-level instructions - // second stage actually writes out the low-level instructions into brainfuck - - let compiler = Compiler { config: &config }; - let instructions = compiler - .compile(&clauses, None)? - .finalise_instructions(false); - - let builder = Builder { config: &config }; - let bf_program = builder.build(instructions, false)?; - - match config.optimise_generated_code { - true => { - optimise(bf_program, config.optimise_generated_all_permutations).to_string() - } - false => bf_program.to_string(), + if ctx.config.enable_2d_grid { + let parsed_syntax = parse_program::(&stripped_program)?; + let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); + let bf_code = ctx.ir_to_bf(instructions, None)?; + bf_code.to_string() + } else { + let parsed_syntax = parse_program::(&stripped_program)?; + let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); + let bf_code = ctx.ir_to_bf(instructions, None)?; + bf_code.to_string() } + + // TODO: fix optimisations + // match ctx.config.optimise_generated_code { + // true => ctx.optimise_bf_code(bf_code).to_string(), + // false => bf_code.to_string(), + // } } false => program, }; if args.run || !args.compile { // run brainfuck - let config = BVMConfig { - enable_debug_symbols: false, - enable_2d_grid: false, + let ctx = BrainfuckContext { + config: BrainfuckConfig { + enable_debug_symbols: false, + enable_2d_grid: false, + }, }; - let mut bvm = BVM::new(config, bf_program.chars().collect()); if args.input.is_some() { - bvm.run(&mut Cursor::new(args.input.unwrap()), &mut stdout(), None)?; + ctx.run( + bf_program.chars().collect(), + &mut Cursor::new(args.input.unwrap()), + &mut stdout(), + None, + )?; } else { - bvm.run(&mut stdin(), &mut stdout(), None)?; + ctx.run( + bf_program.chars().collect(), + &mut stdin(), + &mut stdout(), + None, + )?; } } else { print!("{bf_program}"); diff --git a/compiler/src/mastermind_optimiser.rs b/compiler/src/mastermind_optimiser.rs deleted file mode 100644 index bbb70b8..0000000 --- a/compiler/src/mastermind_optimiser.rs +++ /dev/null @@ -1,6 +0,0 @@ -// Here's the brief: -// your task is to take in a list of clauses, and output an optimised version -// the main functionality will be finding when variables are actually used and minimising their lifetime to reduce allocation time - -// the secondary but also very important task is to use the above variable lifetimes to use compiler construction variants that do not unnecessarily copy variables around -// maybe this second point should be in the compiler, not here diff --git a/compiler/src/misc.rs b/compiler/src/misc.rs index b61e3bf..bdbf7c2 100644 --- a/compiler/src/misc.rs +++ b/compiler/src/misc.rs @@ -1,17 +1,14 @@ -#[derive(serde::Deserialize)] +#[derive(Clone, serde::Deserialize)] pub struct MastermindConfig { // basic pure brainfuck optimisations pub optimise_generated_code: bool, + // TODO: rename this: (turn on exhaustive search for solving 2D brainfuck optimisation) pub optimise_generated_all_permutations: bool, + // track cell value and clear with constant addition if possible pub optimise_cell_clearing: bool, // track cell value and skip loops which can never be entered pub optimise_unreachable_loops: bool, - // TODO: prune variables that aren't needed? Maybe combine with empty blocks stuff - pub optimise_variable_usage: bool, - // TODO: optimise memory layout to minimise tape head movement - // recommended to turn on these next two together - pub optimise_memory_allocation: bool, // golf constants, useful for single characters or large numbers // probably not great with strings yet, may need another optimisation for that pub optimise_constants: bool, @@ -25,17 +22,39 @@ pub struct MastermindConfig { // '2D Mastermind - Nearest' 3 pub memory_allocation_method: u8, pub enable_2d_grid: bool, + // TODO: prune variables that aren't needed? Maybe combine with empty blocks stuff + // pub optimise_variable_usage: bool, + // recommended to turn on these next two together + // pub optimise_memory_allocation: bool, +} + +impl Default for MastermindConfig { + fn default() -> MastermindConfig { + MastermindConfig { + optimise_generated_code: false, + optimise_generated_all_permutations: false, + optimise_cell_clearing: false, + optimise_unreachable_loops: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, + optimise_constants: false, + optimise_empty_blocks: false, + memory_allocation_method: 0, + enable_2d_grid: false, + } + } } impl MastermindConfig { + // TODO: rethink this bitmask thing pub fn new(optimise_bitmask: usize) -> MastermindConfig { MastermindConfig { optimise_generated_code: (optimise_bitmask & 0b00000001) > 0, optimise_generated_all_permutations: (optimise_bitmask & 0b00001000) > 0, optimise_cell_clearing: (optimise_bitmask & 0b00000010) > 0, optimise_unreachable_loops: (optimise_bitmask & 0b00000100) > 0, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_constants: false, optimise_empty_blocks: false, memory_allocation_method: 0, @@ -43,3 +62,7 @@ impl MastermindConfig { } } } + +pub struct MastermindContext { + pub config: MastermindConfig, +} diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs deleted file mode 100644 index 4b8b809..0000000 --- a/compiler/src/parser.rs +++ /dev/null @@ -1,1433 +0,0 @@ -use crate::{ - builder::TapeCell, - macros::macros::{r_assert, r_panic}, - tokeniser::Token, -}; -use std::{fmt::Display, mem::discriminant, num::Wrapping}; - -// recursive function to create a tree representation of the program -pub fn parse(tokens: &[Token]) -> Result, String> { - // basic steps: - // chew off tokens from the front, recursively parse blocks of tokens - let mut clauses: Vec = Vec::new(); - let mut i = 0usize; - while let Some(clause) = get_clause_tokens(&tokens[i..])? { - match ( - &clause[0], - &clause.get(1).unwrap_or(&Token::None), - &clause.get(2).unwrap_or(&Token::None), - ) { - (Token::Cell, _, _) - | (Token::Struct, Token::Name(_), Token::Name(_) | Token::OpenSquareBracket) => { - clauses.push(parse_let_clause(clause)?); - } - (Token::Struct, Token::Name(_), Token::OpenBrace) => { - clauses.push(parse_struct_clause(clause)?); - } - (Token::Plus, Token::Plus, _) | (Token::Minus, Token::Minus, _) => { - clauses.push(parse_increment_clause(clause)?); - } - (Token::Name(_), Token::EqualsSign | Token::Dot | Token::OpenSquareBracket, _) => { - clauses.extend(parse_set_clause(clause)?); - } - (Token::Drain, _, _) => { - clauses.push(parse_drain_copy_clause(clause, true)?); - } - (Token::Copy, _, _) => { - clauses.push(parse_drain_copy_clause(clause, false)?); - } - (Token::While, _, _) => { - clauses.push(parse_while_clause(clause)?); - } - (Token::Output, _, _) => { - clauses.push(parse_output_clause(clause)?); - } - (Token::Input, _, _) => { - clauses.push(parse_input_clause(clause)?); - } - (Token::Name(_), Token::OpenParenthesis, _) => { - clauses.push(parse_function_call_clause(clause)?); - } - (Token::Fn, _, _) => { - clauses.push(parse_function_definition_clause(clause)?); - } - (Token::Name(_), Token::Plus | Token::Minus, Token::EqualsSign) => { - clauses.extend(parse_add_clause(clause)?); - } - (Token::If, _, _) => { - clauses.push(parse_if_else_clause(clause)?); - } - (Token::OpenBrace, _, _) => { - let braced_tokens = get_braced_tokens(clause, BRACES)?; - let inner_clauses = parse(braced_tokens)?; - clauses.push(Clause::Block(inner_clauses)); - } - (Token::Bf, _, _) => { - clauses.push(parse_brainfuck_clause(clause)?); - } - (Token::Assert, _, _) => clauses.push(parse_assert_clause(clause)?), - // empty clause - (Token::Semicolon, _, _) => (), - // the None token usually represents whitespace, it should be filtered out before reaching this function - // Wrote out all of these possibilities so that the compiler will tell me when I haven't implemented a token - ( - Token::None - | Token::Else - | Token::Not - | Token::ClosingBrace - | Token::OpenSquareBracket - | Token::ClosingSquareBracket - | Token::OpenParenthesis - | Token::ClosingParenthesis - | Token::LessThan - | Token::MoreThan - | Token::Comma - | Token::Plus - | Token::Minus - | Token::Into - | Token::Digits(_) - | Token::Name(_) - | Token::String(_) - | Token::Character(_) - | Token::True - | Token::False - | Token::EqualsSign - | Token::Asterisk - | Token::Clobbers - | Token::Equals - | Token::Unknown - | Token::Dot - | Token::At - | Token::Struct - | Token::UpToken, - _, - _, - ) => r_panic!("Invalid clause: {clause:#?}"), - }; - i += clause.len(); - } - - Ok(clauses) -} - -fn parse_let_clause(clause: &[Token]) -> Result { - // cell x = 0; - // struct DummyStruct y - let mut i = 0usize; - // this kind of logic could probably be done with iterators, (TODO for future refactors) - - let (var, len) = parse_var_definition(&clause[i..], true)?; - i += len; - - if let Token::EqualsSign = &clause[i] { - i += 1; - let remaining = &clause[i..(clause.len() - 1)]; - let expr = Expression::parse(remaining)?; - // equivalent to set clause stuff - // except we need to convert a variable definition to a variable target - Ok(Clause::DefineVariable { var, value: expr }) - } else if i < (clause.len() - 1) { - r_panic!("Invalid token in let clause: {clause:#?}"); - } else { - Ok(Clause::DeclareVariable { var }) - } -} - -/// Parse tokens representing a struct definition into a clause -fn parse_struct_clause(clause: &[Token]) -> Result { - let mut i = 0usize; - let Token::Struct = &clause[i] else { - r_panic!("Expected struct keyword in struct clause. This should never occur. {clause:#?}"); - }; - i += 1; - - let Token::Name(struct_name) = &clause[i] else { - r_panic!("Expected identifier in struct clause. This should never occur. {clause:#?}"); - }; - i += 1; - - let Token::OpenBrace = &clause[i] else { - r_panic!("Expected open brace in struct clause: {clause:#?}"); - }; - let braced_tokens = get_braced_tokens(&clause[i..], BRACES)?; - - let mut fields = vec![]; - - let mut j = 0usize; - loop { - let (field, len) = parse_var_definition(&braced_tokens[j..], true)?; - j += len; - fields.push(field); - r_assert!( - j <= braced_tokens.len(), - "Struct definition field exceeded braces. This should never occur. {clause:#?}" - ); - let Token::Semicolon = &braced_tokens[j] else { - r_panic!("Expected semicolon in struct definition field: {clause:#?}"); - }; - j += 1; - if j == braced_tokens.len() { - break; - } - } - r_assert!( - j == braced_tokens.len(), - "Struct definitions exceeded braces. This should never occur. {clause:#?}" - ); - // i += j + 2; - - Ok(Clause::DefineStruct { - name: struct_name.clone(), - fields, - }) -} - -fn parse_add_clause(clause: &[Token]) -> Result, String> { - let mut clauses: Vec = Vec::new(); - let mut i = 0usize; - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - - let positive = match &clause[i] { - Token::Plus => true, - Token::Minus => false, - _ => { - r_panic!("Unexpected second token in add clause: {clause:#?}"); - } - }; - i += 2; // assume the equals sign is there because it was checked by the main loop - let raw_expr = Expression::parse(&clause[i..(clause.len() - 1)])?; - let expr = match positive { - true => raw_expr, - false => Expression::SumExpression { - sign: Sign::Negative, - summands: vec![raw_expr], - }, - }; - //Check if this add clause self references - let self_referencing = expr.check_self_referencing(&var); - - clauses.push(Clause::AddToVariable { - var, - value: expr, - self_referencing: self_referencing, - }); - - Ok(clauses) -} - -// currently just syntax sugar, should make it actually do post/pre increments -fn parse_increment_clause(clause: &[Token]) -> Result { - let (var, _) = parse_var_target(&clause[2..])?; - //An increment clause can never be self referencing since it just VAR++ - Ok(match (&clause[0], &clause[1]) { - (Token::Plus, Token::Plus) => Clause::AddToVariable { - var, - value: Expression::NaturalNumber(1), - self_referencing: false, - }, - (Token::Minus, Token::Minus) => Clause::AddToVariable { - var, - value: Expression::NaturalNumber((-1i8 as u8) as usize), - self_referencing: false, - }, - _ => { - r_panic!("Invalid pattern in increment clause: {clause:#?}"); - } - }) - // assumed that the final token is a semicolon -} - -fn parse_set_clause(clause: &[Token]) -> Result, String> { - // TODO: what do we do about arrays and strings and structs? - let mut clauses: Vec = Vec::new(); - let mut i = 0usize; - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - - // definitely could use iterators instead (TODO for refactor) - match &clause[i] { - Token::EqualsSign => { - i += 1; - let expr = Expression::parse(&clause[i..(clause.len() - 1)])?; - let self_referencing = expr.check_self_referencing(&var); - clauses.push(Clause::SetVariable { - var, - value: expr, - self_referencing, - }); - } - Token::Plus | Token::Minus => { - let is_add = if let Token::Plus = &clause[i] { - true - } else { - false - }; - i += 1; - let Token::EqualsSign = &clause[i] else { - r_panic!("Expected equals sign in add-assign operator: {clause:#?}"); - }; - i += 1; - - let mut expr = Expression::parse(&clause[i..(clause.len() - 1)])?; - if !is_add { - expr = expr.flipped_sign()?; - } - - let self_referencing = expr.check_self_referencing(&var); - clauses.push(Clause::AddToVariable { - var, - value: expr, - self_referencing, - }); - } - _ => r_panic!("Expected assignment operator in set clause: {clause:#?}"), - } - - Ok(clauses) -} - -fn parse_drain_copy_clause(clause: &[Token], is_draining: bool) -> Result { - // drain g {i += 1;}; - // drain g into j; - // copy foo into bar {g += 2; etc;}; - // TODO: make a tuple-parsing function and use it here instead of a space seperated list of targets - - let mut targets = Vec::new(); - let mut block: Vec = Vec::new(); - let mut i = 1usize; - - let condition_start_token = i; - - i += 1; - while let Some(token) = clause.get(i) { - if let Token::Into | Token::OpenBrace | Token::Semicolon = token { - break; - } - i += 1; - } - r_assert!( - i < clause.len(), - "Expected source expression in draining/copying loop: {clause:#?}" - ); - - let source = Expression::parse(&clause[condition_start_token..i])?; - - if let Token::Into = &clause[i] { - // simple drain/copy move operations - i += 1; - - loop { - match &clause[i] { - Token::Name(_) | Token::Asterisk => { - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - targets.push(var); - } - Token::OpenBrace | Token::Semicolon => { - break; - } - _ => { - r_panic!("Unexpected token in drain clause: {clause:#?}"); - } - } - } - } - - if let Token::OpenBrace = &clause[i] { - // code block to execute at each loop iteration - let braced_tokens = get_braced_tokens(&clause[i..], BRACES)?; - // recursion - block.extend(parse(braced_tokens)?); - // i += 2 + braced_tokens.len(); - } - - Ok(Clause::CopyLoop { - source, - targets, - block, - is_draining, - }) -} - -fn parse_while_clause(clause: &[Token]) -> Result { - // TODO: make this able to accept expressions - let mut i = 1usize; - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - // loop { - // if let Token::OpenBrace = &clause[i] { - // break; - // }; - // i += 1; - // } - - // let expr = parse_expression(&clause[1..i]); - let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; - // i += 2 + block_tokens.len(); - - Ok(Clause::WhileLoop { - var, - block: parse(block_tokens)?, - }) -} - -fn parse_if_else_clause(clause: &[Token]) -> Result { - // skip first token, assumed to start with if - let mut i = 1usize; - let mut not = false; - if let Token::Not = &clause[i] { - not = true; - i += 1; - } - - let condition_start_token = i; - - i += 1; - while let Some(token) = clause.get(i) { - if let Token::OpenBrace = token { - break; - } - i += 1; - } - r_assert!( - i < clause.len(), - "Expected condition and block in if statement: {clause:#?}" - ); - - let condition = Expression::parse(&clause[condition_start_token..i])?; - - let block_one: Vec = { - let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; - i += 2 + block_tokens.len(); - parse(block_tokens)? - }; - - let block_two: Option> = if let Some(Token::Else) = &clause.get(i) { - i += 1; - let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; - // i += 2 + block_tokens.len(); - Some(parse(block_tokens)?) - } else { - None - }; - - Ok(match (not, block_one, block_two) { - (false, block_one, block_two) => Clause::IfElse { - condition, - if_block: Some(block_one), - else_block: block_two, - }, - (true, block_one, block_two) => Clause::IfElse { - condition, - if_block: block_two, - else_block: Some(block_one), - }, - }) -} - -fn parse_output_clause(clause: &[Token]) -> Result { - let mut i = 1usize; - - let expr_tokens = &clause[i..(clause.len() - 1)]; - let expr = Expression::parse(expr_tokens)?; - i += expr_tokens.len(); - - let Token::Semicolon = &clause[i] else { - r_panic!("Invalid token at end of output clause: {clause:#?}"); - }; - - Ok(Clause::OutputValue { value: expr }) -} - -fn parse_input_clause(clause: &[Token]) -> Result { - let mut i = 1usize; - - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - - let Token::Semicolon = &clause[i] else { - r_panic!("Invalid token at end of input clause: {clause:#?}"); - }; - - Ok(Clause::InputVariable { var }) -} - -fn parse_assert_clause(clause: &[Token]) -> Result { - let mut i = 1usize; - - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - - if let Token::Unknown = &clause[i] { - Ok(Clause::AssertVariableValue { var, value: None }) - } else { - let Token::Equals = &clause[i] else { - r_panic!("Expected assertion value in assert clause: {clause:#?}"); - }; - i += 1; - - let Token::Semicolon = &clause[clause.len() - 1] else { - r_panic!("Invalid token at end of assert clause: {clause:#?}"); - }; - - let remaining = &clause[i..(clause.len() - 1)]; - let expr = Expression::parse(remaining)?; - - Ok(Clause::AssertVariableValue { - var, - value: Some(expr), - }) - } -} - -// parse any memory location specifiers -// let g @4,2 = 68; -// or -// let p @3 = 68; -fn parse_location_specifier(tokens: &[Token]) -> Result<(LocationSpecifier, usize), String> { - if tokens.len() == 0 { - return Ok((LocationSpecifier::None, 0)); - } - if let Token::At = &tokens[0] { - let mut i = 1; - - match &tokens[i] { - Token::Digits(_) | Token::Minus => { - let x_offset = { - let mut positive = true; - if let Token::Minus = &tokens[i] { - i += 1; - positive = false; - } - let Token::Digits(raw) = &tokens[i] else { - r_panic!( - "Expected number after \"-\" in memory location specifier: {tokens:#?}" - ); - }; - i += 1; - - // TODO: error handling - let mut offset: i32 = raw.parse().unwrap(); - if !positive { - offset = -offset; - } - offset - }; - - let y_offset = { - if let Token::Comma = &tokens[i] { - i += 1; - let mut positive = true; - if let Token::Minus = &tokens[i] { - i += 1; - positive = false; - } - let Token::Digits(raw) = &tokens[i] else { - r_panic!( - "Expected number after \"-\" in memory location specifier: {tokens:#?}" - ); - }; - i += 1; - - // TODO: error handling - let mut offset: i32 = raw.parse().unwrap(); - if !positive { - offset = -offset; - } - offset - } else { - 0 - } - }; - - return Ok((LocationSpecifier::Cell((x_offset, y_offset)), i)); - } - Token::Name(_) => { - // variable location specifier - let (var, len) = parse_var_target(&tokens[i..])?; - i += len; - - return Ok((LocationSpecifier::Variable(var), i)); - } - _ => r_panic!("Expected constant or variable in location specifier: {tokens:#?}"), - } - } - - Ok((LocationSpecifier::None, 0)) -} - -fn parse_brainfuck_clause(clause: &[Token]) -> Result { - // bf {++--<><} - // bf @3 {++--<><} - // bf clobbers var1 var2 {++--<><} - // bf @2 clobbers *arr {++--<><} - - let mut clobbers = Vec::new(); - let mut i = 1usize; - - // check for location specifier - let (mem_offset, len) = parse_location_specifier(&clause[i..])?; - i += len; - - if let Token::Clobbers = &clause[i] { - i += 1; - - loop { - match &clause[i] { - Token::Name(_) | Token::Asterisk => { - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - clobbers.push(var); - } - Token::OpenBrace => { - break; - } - _ => { - r_panic!("Unexpected token in drain clause: {clause:#?}"); - } - } - } - } - - let bf_tokens = get_braced_tokens(&clause[i..], BRACES)?; - let mut ops = Vec::new(); - let mut j = 0; - while j < bf_tokens.len() { - // TODO: support embedded mastermind in the embedded brainfuck - // TODO: combine [-] into clear opcodes - match &bf_tokens[j] { - Token::Plus => ops.push(ExtendedOpcode::Add), - Token::Minus => ops.push(ExtendedOpcode::Subtract), - Token::MoreThan => ops.push(ExtendedOpcode::Right), - Token::LessThan => ops.push(ExtendedOpcode::Left), - Token::UpToken => ops.push(ExtendedOpcode::Up), - Token::OpenSquareBracket => ops.push(ExtendedOpcode::OpenLoop), - Token::ClosingSquareBracket => ops.push(ExtendedOpcode::CloseLoop), - Token::Dot => ops.push(ExtendedOpcode::Output), - Token::Comma => ops.push(ExtendedOpcode::Input), - Token::Name(s) => { - for c in s.chars() { - if c == 'v' { - ops.push(ExtendedOpcode::Down); - } else { - panic!("Invalid Inline Brainfuck Characters in {s}"); - } - } - } - Token::OpenBrace => { - // embedded mastermind - let block_tokens = get_braced_tokens(&bf_tokens[j..], BRACES)?; - let clauses = parse(block_tokens)?; - ops.push(ExtendedOpcode::Block(clauses)); - j += block_tokens.len() + 1; - } - // not sure whether to panic here or do nothing - _ => (), - } - j += 1; - } - - Ok(Clause::InlineBrainfuck { - location_specifier: mem_offset, - clobbered_variables: clobbers, - operations: ops, - }) -} - -fn parse_function_definition_clause(clause: &[Token]) -> Result { - let mut i = 1usize; - // function name - let Token::Name(name) = &clause[i] else { - r_panic!("Expected function name after in function definition clause: {clause:#?}"); - }; - let mut args = Vec::new(); - i += 1; - let Token::OpenParenthesis = &clause[i] else { - r_panic!("Expected argument list in function definition clause: {clause:#?}"); - }; - let arg_tokens = get_braced_tokens(&clause[i..], PARENTHESES)?; - let mut j = 0usize; - // parse function argument names - while j < arg_tokens.len() { - // break if no more arguments - let (Token::Cell | Token::Struct) = &arg_tokens[j] else { - break; - }; - let (var, len) = parse_var_definition(&arg_tokens[j..], false)?; - j += len; - - args.push(var); - - if j >= arg_tokens.len() { - break; - } else if let Token::Comma = &arg_tokens[j] { - j += 1; - } else { - r_panic!("Unexpected token in function definition arguments: {arg_tokens:#?}"); - } - } - - i += 2 + arg_tokens.len(); - - // recursively parse the inner block - let Token::OpenBrace = &clause[i] else { - r_panic!("Expected execution block in function definition: {clause:#?}"); - }; - - let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; - let parsed_block: Vec = parse(block_tokens)?; - - Ok(Clause::DefineFunction { - name: name.clone(), - arguments: args, - block: parsed_block, - }) -} - -fn parse_function_call_clause(clause: &[Token]) -> Result { - let mut i = 0usize; - // Okay I didn't know this rust syntax, could have used it all over the place - let Token::Name(name) = &clause[i] else { - r_panic!("Expected function identifier at start of function call clause: {clause:#?}"); - }; - let mut args = Vec::new(); - i += 1; - - let Token::OpenParenthesis = &clause[i] else { - r_panic!("Expected argument list in function call clause: {clause:#?}"); - }; - let arg_tokens = get_braced_tokens(&clause[i..], PARENTHESES)?; - - let mut j = 0usize; - while j < arg_tokens.len() { - // this used to be in the while condition but moved it here to check for the case of no arguments - let Token::Name(_) = &arg_tokens[j] else { - break; - }; - let (var, len) = parse_var_target(&arg_tokens[j..])?; - j += len; - - args.push(var); - - if j >= arg_tokens.len() { - break; - } else if let Token::Comma = &arg_tokens[j] { - j += 1; - } else { - r_panic!("Unexpected token in function call arguments: {arg_tokens:#?}"); - } - } - - i += 2 + arg_tokens.len(); - - let Token::Semicolon = &clause[i] else { - r_panic!("Expected clause delimiter at end of function call clause: {clause:#?}"); - }; - - Ok(Clause::CallFunction { - function_name: name.clone(), - arguments: args, - }) -} - -fn parse_var_target(tokens: &[Token]) -> Result<(VariableTarget, usize), String> { - let mut i = 0usize; - let is_spread = if let Token::Asterisk = &tokens[i] { - i += 1; - true - } else { - false - }; - - let Token::Name(var_name) = &tokens[i] else { - r_panic!("Expected identifier in variable target identifier: {tokens:#?}"); - }; - i += 1; - - let mut ref_chain = vec![]; - while i < tokens.len() { - match &tokens[i] { - Token::OpenSquareBracket => { - let (index, tokens_used) = parse_subscript(&tokens[i..])?; - i += tokens_used; - ref_chain.push(Reference::Index(index)); - } - Token::Dot => { - i += 1; - let Token::Name(subfield_name) = &tokens[i] else { - r_panic!("Expected subfield name in variable target identifier: {tokens:#?}"); - }; - i += 1; - - ref_chain.push(Reference::NamedField(subfield_name.clone())); - } - _ => { - break; - } - } - } - - Ok(( - VariableTarget { - name: var_name.clone(), - subfields: if ref_chain.len() > 0 { - Some(VariableTargetReferenceChain(ref_chain)) - } else { - None - }, - is_spread, - }, - i, - )) -} - -/// convert tokens of a variable definition into data representation, e.g. `cell x`, `struct G g`, `cell[5] x_arr`, `struct H[100] hs` -fn parse_var_definition( - tokens: &[Token], - allow_location: bool, -) -> Result<(VariableDefinition, usize), String> { - let mut i = 0usize; - let mut var_type = match &tokens[i] { - Token::Cell => { - i += 1; - - VariableTypeReference::Cell - } - Token::Struct => { - i += 1; - - let Token::Name(struct_name) = &tokens[i] else { - r_panic!("Expected struct type name in variable definition: {tokens:#?}"); - }; - i += 1; - - VariableTypeReference::Struct(struct_name.clone()) - } - _ => { - r_panic!("Unexpected token in variable definition, this should not occur: {tokens:#?}") - } - }; - - // parse array specifiers - while let Token::OpenSquareBracket = &tokens[i] { - let (len, j) = parse_array_length(&tokens[i..])?; - i += j; - - var_type = VariableTypeReference::Array(Box::new(var_type), len); - } - - let Token::Name(var_name) = &tokens[i] else { - r_panic!("Expected identifier in variable definition: {tokens:#?}"); - }; - i += 1; - - let (location_specifier, len) = parse_location_specifier(&tokens[i..])?; - - r_assert!( - location_specifier.is_none() || allow_location, - "Unexpected location specifier in variable definition: {tokens:#?}" - ); - i += len; - - Ok(( - VariableDefinition { - var_type, - name: var_name.clone(), - location_specifier, - }, - i, - )) -} - -/// parse the subscript of an array variable, e.g. [4] [6] -/// must be compile-time constant -/// returns (array length, tokens used) -/// assumes the first token is an open square bracket -fn parse_subscript(tokens: &[Token]) -> Result<(usize, usize), String> { - let mut i = 0usize; - let subscript = get_braced_tokens(&tokens[i..], SQUARE_BRACKETS)?; - let Expression::NaturalNumber(len) = Expression::parse(subscript)? else { - r_panic!("Expected a compile-time constant in subscript: {tokens:#?}"); - }; - - i += 2 + subscript.len(); - - Ok((len, i)) -} - -/// parse_array_subscript but with a length check -fn parse_array_length(tokens: &[Token]) -> Result<(usize, usize), String> { - let (len, i) = parse_subscript(tokens)?; - r_assert!(len > 0, "Array variable cannot be zero-length: {tokens:#?}"); - Ok((len, i)) -} - -// get a clause, typically a line, bounded by ; -fn get_clause_tokens(tokens: &[Token]) -> Result, String> { - if tokens.len() < 2 { - Ok(None) - } else { - let mut i = 0usize; - while i < tokens.len() { - match tokens[i] { - Token::OpenBrace => { - let braced_block = get_braced_tokens(&tokens[i..], BRACES)?; - i += 2 + braced_block.len(); - // handle blocks marking the end of clauses, if/else being the exception - if i < tokens.len() { - if let Token::Else = tokens[i] { - i += 1; - let else_block = get_braced_tokens(&tokens[i..], BRACES)?; - i += 2 + else_block.len(); - } - } - return Ok(Some(&tokens[..i])); - } - Token::Semicolon => { - i += 1; - return Ok(Some(&tokens[..i])); - } - _ => { - i += 1; - } - } - } - - r_panic!("No clause could be found in: {tokens:#?}"); - } -} - -const SQUARE_BRACKETS: (Token, Token) = (Token::OpenSquareBracket, Token::ClosingSquareBracket); -const BRACES: (Token, Token) = (Token::OpenBrace, Token::ClosingBrace); -const PARENTHESES: (Token, Token) = (Token::OpenParenthesis, Token::ClosingParenthesis); -const ANGLED_BRACKETS: (Token, Token) = (Token::LessThan, Token::MoreThan); -// this should be a generic function but rust doesn't support enum variants as type arguments yet -// find tokens bounded by matching brackets -// TODO: make an impl for &[Token] and put all these functions in it -fn get_braced_tokens(tokens: &[Token], braces: (Token, Token)) -> Result<&[Token], String> { - let (open_brace, closing_brace) = (discriminant(&braces.0), discriminant(&braces.1)); - // find corresponding bracket, the depth check is unnecessary but whatever - let len = { - let mut i = 1usize; - let mut depth = 1; - while i < tokens.len() && depth > 0 { - let g = discriminant(&tokens[i]); - if g == open_brace { - depth += 1; - } else if g == closing_brace { - depth -= 1; - } - i += 1; - } - i - }; - - if len >= 2 { - if open_brace == discriminant(&tokens[0]) && closing_brace == discriminant(&tokens[len - 1]) - { - return Ok(&tokens[1..(len - 1)]); - } - } - r_panic!("Invalid braced tokens: {tokens:#?}"); -} - -impl Expression { - // Iterators? - // TODO: support post/pre increment in expressions - fn parse(tokens: &[Token]) -> Result { - let mut i = 0usize; - - if let Token::String(s) = &tokens[i] { - i += 1; - r_assert!( - i == tokens.len(), - "Expected semicolon after string literal {tokens:#?}" - ); - return Ok(Expression::StringLiteral(s.clone())); - } - - if let Token::OpenSquareBracket = &tokens[i] { - let braced_tokens = get_braced_tokens(&tokens[i..], SQUARE_BRACKETS)?; - i += 2 + braced_tokens.len(); - r_assert!( - i == tokens.len(), - "Expected semicolon after array literal {tokens:#?}" - ); - // parse the array - let results: Result, String> = braced_tokens - .split(|t| if let Token::Comma = t { true } else { false }) - .map(Self::parse) - .collect(); - // TODO: why do I need to split collect result into a seperate variable like here? - return Ok(Expression::ArrayLiteral(results?)); - } - - let mut current_sign = Some(Sign::Positive); // by default the first summand is positive - let mut summands = Vec::new(); - while i < tokens.len() { - match (¤t_sign, &tokens[i]) { - (None, Token::Plus) => { - current_sign = Some(Sign::Positive); - i += 1; - } - (None, Token::Minus) => { - current_sign = Some(Sign::Negative); - i += 1; - } - (Some(Sign::Positive), Token::Minus) => { - current_sign = Some(Sign::Negative); - i += 1; - } - (Some(Sign::Negative), Token::Minus) => { - current_sign = Some(Sign::Positive); - i += 1; - } - (Some(sign), Token::Digits(literal)) => { - let parsed_int: usize = literal.parse().unwrap(); - i += 1; - match sign { - Sign::Positive => summands.push(Expression::NaturalNumber(parsed_int)), - Sign::Negative => summands.push(Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::NaturalNumber(parsed_int)], - }), - } - current_sign = None; - } - (Some(sign), Token::True | Token::False) => { - let parsed_int = match &tokens[i] { - Token::True => 1, - Token::False => 0, - _ => r_panic!( - "Unreachable error occured while parsing boolean value: {tokens:#?}" - ), - }; - i += 1; - match sign { - Sign::Positive => summands.push(Expression::NaturalNumber(parsed_int)), - Sign::Negative => summands.push(Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::NaturalNumber(parsed_int)], - }), - } - current_sign = None; - } - (Some(sign), Token::Character(chr)) => { - let chr_int: usize = *chr as usize; - - r_assert!( - chr_int < 0xff, - "Character tokens must be single-byte: {chr}" - ); - - i += 1; - match sign { - Sign::Positive => summands.push(Expression::NaturalNumber(chr_int)), - Sign::Negative => summands.push(Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::NaturalNumber(chr_int)], - }), - } - current_sign = None; - } - (Some(sign), Token::Name(_) | Token::Asterisk) => { - let (var, len) = parse_var_target(&tokens[i..])?; - i += len; - match sign { - Sign::Positive => summands.push(Expression::VariableReference(var)), - Sign::Negative => summands.push(Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::VariableReference(var)], - }), - } - current_sign = None; - } - (Some(sign), Token::OpenParenthesis) => { - let braced_tokens = get_braced_tokens(&tokens[i..], PARENTHESES)?; - i += 2 + braced_tokens.len(); - let braced_expr = Self::parse(braced_tokens)?; - // probably inefficent but everything needs to be flattened at some point anyway so won't matter - // TODO: make expression structure more efficient (don't use vectors every time there is a negative) - summands.push(match (sign, braced_expr.clone()) { - ( - Sign::Negative, - Expression::NaturalNumber(_) | Expression::VariableReference(_), - ) => Expression::SumExpression { - sign: Sign::Negative, - summands: vec![braced_expr], - }, - ( - Sign::Negative, - Expression::SumExpression { - sign: Sign::Negative, - summands, - }, - ) => Expression::SumExpression { - sign: Sign::Positive, - summands, - }, - ( - Sign::Negative, - Expression::SumExpression { - sign: Sign::Positive, - summands, - }, - ) => Expression::SumExpression { - sign: Sign::Negative, - summands, - }, - _ => braced_expr, - }); - current_sign = None; - } - _ => { - r_panic!( - "Unexpected token {:#?} found in expression: {tokens:#?}", - tokens[i] - ); - } - } - } - - match summands.len() { - 1 => Ok(summands.into_iter().next().unwrap()), - 1.. => Ok(Expression::SumExpression { - sign: Sign::Positive, - summands, - }), - _ => r_panic!("Expected value in expression: {tokens:#?}"), - } - } - - /// flip the sign of an expression, equivalent to `x => -(x)` - pub fn flipped_sign(self) -> Result { - Ok(match self { - Expression::SumExpression { sign, summands } => Expression::SumExpression { - sign: sign.flipped(), - summands, - }, - Expression::NaturalNumber(_) | Expression::VariableReference(_) => { - Expression::SumExpression { - sign: Sign::Negative, - summands: vec![self], - } - } - Expression::ArrayLiteral(_) | Expression::StringLiteral(_) => { - r_panic!( - "Attempted to invert sign of array or string literal, \ - do not use += or -= on arrays or strings." - ); - } - }) - } - - // not sure if this is the compiler's concern or if it should be the parser - // (constant to add, variables to add, variables to subtract) - // currently multiplication is not supported so order of operations and flattening is very trivial - // If we add multiplication in future it will likely be constant multiplication only, so no variable on variable multiplication - pub fn flatten(&self) -> Result<(u8, Vec, Vec), String> { - let expr = self; - let mut imm_sum = Wrapping(0u8); - let mut additions = Vec::new(); - let mut subtractions = Vec::new(); - - match expr { - Expression::SumExpression { sign, summands } => { - let results: Result, Vec)>, String> = - summands.into_iter().map(|expr| expr.flatten()).collect(); - let flattened = results? - .into_iter() - .reduce(|acc, (imm, adds, subs)| { - ( - (Wrapping(acc.0) + Wrapping(imm)).0, - [acc.1, adds].concat(), - [acc.2, subs].concat(), - ) - }) - .unwrap_or((0, vec![], vec![])); - - match sign { - Sign::Positive => { - imm_sum += flattened.0; - additions.extend(flattened.1); - subtractions.extend(flattened.2); - } - Sign::Negative => { - imm_sum -= flattened.0; - subtractions.extend(flattened.1); - additions.extend(flattened.2); - } - }; - } - Expression::NaturalNumber(number) => { - imm_sum += Wrapping(*number as u8); - } - Expression::VariableReference(var) => { - additions.push(var.clone()); - } - Expression::ArrayLiteral(_) | Expression::StringLiteral(_) => { - r_panic!("Attempt to flatten an array-like expression: {expr:#?}"); - } - } - - Ok((imm_sum.0, additions, subtractions)) - } - - //Recursively Check If This Is Self Referencing - pub fn check_self_referencing(&self, parent: &VariableTarget) -> bool { - // TODO: make sure nested values work correctly - match self { - Expression::SumExpression { - sign: _sign, - summands, - } => summands - .iter() - .any(|summand| summand.check_self_referencing(parent)), - Expression::VariableReference(var) => *var == *parent, - Expression::ArrayLiteral(_) - | Expression::StringLiteral(_) - | Expression::NaturalNumber(_) => false, - } - } -} - -// TODO: add multiplication -// yes, but no variable * variable multiplication or division -#[derive(Debug, Clone)] -pub enum Expression { - SumExpression { - sign: Sign, - summands: Vec, - }, - NaturalNumber(usize), - VariableReference(VariableTarget), - ArrayLiteral(Vec), - StringLiteral(String), -} - -#[derive(Debug, Clone)] -pub enum Sign { - Positive, - Negative, -} -impl Sign { - fn flipped(self) -> Sign { - match self { - Sign::Positive => Sign::Negative, - Sign::Negative => Sign::Positive, - } - } -} - -#[derive(Debug, Clone)] -pub enum Clause { - DeclareVariable { - var: VariableDefinition, - }, - DefineVariable { - var: VariableDefinition, - value: Expression, - }, - DefineStruct { - name: String, - fields: Vec, - }, - AddToVariable { - var: VariableTarget, - value: Expression, - self_referencing: bool, - }, - SetVariable { - var: VariableTarget, - value: Expression, - self_referencing: bool, - }, - AssertVariableValue { - var: VariableTarget, - // Some(constant) indicates we know the value, None indicates we don't know the value - // typically will either be used for assert unknown or assert 0 - value: Option, - }, - CopyLoop { - source: Expression, - targets: Vec, - block: Vec, - is_draining: bool, - }, - WhileLoop { - var: VariableTarget, - block: Vec, - }, - OutputValue { - value: Expression, - }, - InputVariable { - var: VariableTarget, - }, - DefineFunction { - name: String, - arguments: Vec, - block: Vec, - }, - CallFunction { - function_name: String, - arguments: Vec, - }, - IfElse { - condition: Expression, - if_block: Option>, - else_block: Option>, - }, - Block(Vec), - InlineBrainfuck { - location_specifier: LocationSpecifier, - clobbered_variables: Vec, - // TODO: make this support embedded mastermind - operations: Vec, - }, -} - -// extended brainfuck opcodes to include mastermind code blocks -#[derive(Debug, Clone)] -pub enum ExtendedOpcode { - Add, - Subtract, - Right, - Left, - OpenLoop, - CloseLoop, - Output, - Input, - Block(Vec), - Up, - Down, -} - -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -/// the type of a variable according to the user, not validated yet as the parser does not keep track of types -// maybe it should keep track of types? -pub enum VariableTypeReference { - Cell, - Struct(String), - Array(Box, usize), -} - -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub enum LocationSpecifier { - None, - Cell(TapeCell), - Variable(VariableTarget), -} -impl LocationSpecifier { - fn is_none(&self) -> bool { - match self { - LocationSpecifier::None => true, - _ => false, - } - } -} - -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub struct VariableDefinition { - pub name: String, - pub var_type: VariableTypeReference, - pub location_specifier: LocationSpecifier, - // Infinite {name: String, pattern: ???}, -} - -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub enum Reference { - NamedField(String), - Index(usize), -} - -/// Represents a list of subfield references after the `.` or `[x]` operators, e.g. `obj.h[6]` would have `['h', '[6]']` -// a bit verbose, not quite sure about this -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub struct VariableTargetReferenceChain(pub Vec); -/// Represents a target variable in an expression, this has no type informatino -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub struct VariableTarget { - pub name: String, - pub subfields: Option, - pub is_spread: bool, -} -impl VariableTarget { - /// converts a definition to a target for use with definition clauses (as opposed to declarations) - pub fn from_definition(var_def: &VariableDefinition) -> Self { - VariableTarget { - name: var_def.name.clone(), - subfields: None, - is_spread: false, - } - } -} - -impl Display for VariableTypeReference { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match &self { - VariableTypeReference::Cell => f.write_str(&format!("cell")), - VariableTypeReference::Struct(struct_name) => { - f.write_str(&format!("struct {struct_name}")) - } - VariableTypeReference::Array(element_type, len) => { - f.write_str(&format!("{element_type}[{len}]")) - } - } - } -} - -impl Display for VariableDefinition { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(&format!("{} {}", self.var_type, self.name))?; - match &self.location_specifier { - LocationSpecifier::Cell(_) | LocationSpecifier::Variable(_) => { - f.write_str(&format!(" {}", self.location_specifier))? - } - LocationSpecifier::None => (), - } - - Ok(()) - } -} - -impl Display for LocationSpecifier { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("@")?; - match self { - LocationSpecifier::Cell(cell) => f.write_str(&format!("{:?}", cell))?, - LocationSpecifier::Variable(var) => f.write_str(&format!("{}", var))?, - LocationSpecifier::None => (), - } - - Ok(()) - } -} - -impl Display for Reference { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Reference::NamedField(subfield_name) => f.write_str(&format!(".{subfield_name}"))?, - Reference::Index(index) => f.write_str(&format!("[{index}]"))?, - } - - Ok(()) - } -} - -impl Display for VariableTarget { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.is_spread { - f.write_str("*")?; - } - f.write_str(&self.name)?; - if let Some(subfield_refs) = &self.subfields { - for ref_step in subfield_refs.0.iter() { - f.write_str(&format!("{ref_step}"))?; - } - } - - Ok(()) - } -} diff --git a/compiler/src/parser/expressions.rs b/compiler/src/parser/expressions.rs new file mode 100644 index 0000000..e36c8df --- /dev/null +++ b/compiler/src/parser/expressions.rs @@ -0,0 +1,368 @@ +use super::{ + parser::parse_var_target, + tokens::{next_token, Token}, + types::VariableTarget, +}; +use crate::macros::macros::{r_assert, r_panic}; + +use itertools::Itertools; +use std::num::Wrapping; + +// TODO: simplify expression data structure for negative sums of single values +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub enum Expression { + SumExpression { + sign: Sign, + summands: Vec, + }, + NaturalNumber(usize), + VariableReference(VariableTarget), + ArrayLiteral(Vec), + StringLiteral(String), +} + +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub enum Sign { + Positive, + Negative, +} +impl Sign { + fn flipped(self) -> Sign { + match self { + Sign::Positive => Sign::Negative, + Sign::Negative => Sign::Positive, + } + } +} + +impl Expression { + // Iterators? + // TODO: support post/pre increment in expressions + pub fn parse(chars: &mut &[char]) -> Result { + // parse string expressions + { + let mut s = *chars; + if let Token::String(literal) = next_token(&mut s)? { + *chars = s; + let (Token::RightParenthesis + | Token::Semicolon + | Token::Comma + | Token::LeftBrace + | Token::Into + | Token::RightSquareBracket + | Token::None) = next_token(&mut s)? + else { + // TODO: add source snippet + r_panic!("String literal must entirely comprise expression."); + }; + return Ok(Expression::StringLiteral(literal)); + } + } + + // parse array expressions + { + let mut s = *chars; + if let Ok(Token::LeftSquareBracket) = next_token(&mut s) { + *chars = s; + let mut expressions = vec![]; + loop { + let mut s = *chars; + match (expressions.is_empty(), next_token(&mut s)?) { + (_, Token::RightSquareBracket) => { + *chars = s; + break; + } + (_, Token::Comma) => { + *chars = s; + expressions.push(Self::parse(chars)?); + } + (true, _) => expressions.push(Self::parse(chars)?), + _ => unreachable!(), + } + } + + // check for delimiters + { + let mut s = *chars; + let Ok( + Token::Semicolon + | Token::Comma + | Token::RightParenthesis + | Token::RightSquareBracket + | Token::Into + | Token::None, + ) = next_token(&mut s) + else { + // TODO: add source snippet + r_panic!("Array literal must entirely comprise expression."); + }; + } + return Ok(Expression::ArrayLiteral(expressions)); + } + } + + // parse arithmetic or variable expressions + // this loop is basically a state machine based on the current sign: + let mut current_sign = Some(Sign::Positive); // by default the first summand is positive + let mut summands = Vec::new(); + loop { + let mut s = *chars; + match (¤t_sign, next_token(&mut s)?) { + (None, Token::Plus) => { + *chars = s; + current_sign = Some(Sign::Positive); + } + (None, Token::Minus) => { + *chars = s; + current_sign = Some(Sign::Negative); + } + (Some(Sign::Positive), Token::Minus) => { + *chars = s; + current_sign = Some(Sign::Negative); + } + (Some(Sign::Negative), Token::Minus) => { + *chars = s; + current_sign = Some(Sign::Positive); + } + ( + Some(sign), + token @ (Token::Number(_) | Token::Character(_) | Token::True | Token::False), + ) => { + *chars = s; + let parsed_int = match token { + Token::Number(number) => number, + Token::Character(c) => { + let chr_int = c as usize; + r_assert!(chr_int < 0xff, "Character tokens must be single-byte: {c}"); + chr_int + } + Token::True => 1, + Token::False => 0, + _ => unreachable!(), + }; + summands.push(match sign { + Sign::Positive => Expression::NaturalNumber(parsed_int), + Sign::Negative => Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(parsed_int)], + }, + }); + current_sign = None; + } + (Some(sign), Token::Name(_) | Token::Asterisk) => { + let var = parse_var_target(chars)?; + summands.push(match sign { + Sign::Positive => Expression::VariableReference(var), + Sign::Negative => Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::VariableReference(var)], + }, + }); + current_sign = None; + } + (Some(sign), Token::LeftParenthesis) => { + *chars = s; + let inner_expr = Self::parse(chars)?; + // probably inefficent but everything needs to be flattened at some point anyway so won't matter + // TODO: make expression structure more efficient (don't use vectors every time there is a negative) + summands.push(match (sign, inner_expr.clone()) { + ( + Sign::Negative, + Expression::NaturalNumber(_) | Expression::VariableReference(_), + ) => Expression::SumExpression { + sign: Sign::Negative, + summands: vec![inner_expr], + }, + ( + Sign::Negative, + Expression::SumExpression { + sign: Sign::Negative, + summands, + }, + ) => Expression::SumExpression { + sign: Sign::Positive, + summands, + }, + ( + Sign::Negative, + Expression::SumExpression { + sign: Sign::Positive, + summands, + }, + ) => Expression::SumExpression { + sign: Sign::Negative, + summands, + }, + _ => inner_expr, + }); + let Token::RightParenthesis = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected `)` after inner expression."); + }; + current_sign = None; + } + // TODO: add delimiters here: `)` `;` `,` `{` `into` + ( + sign, + Token::RightParenthesis + | Token::RightSquareBracket + | Token::Semicolon + | Token::Comma + | Token::LeftBrace + | Token::Into + | Token::None, + ) => { + r_assert!(sign.is_none(), "Expected more terms in expression."); + break; + } + // TODO: add source snippet + (_, token) => r_panic!("Unexpected token `{token}` found in expression."), + } + } + + Ok(match summands.len() { + 1 => summands.into_iter().next().unwrap(), + 1.. => Expression::SumExpression { + sign: Sign::Positive, + summands, + }, + // TODO: add source snippet + _ => r_panic!("Expected value in expression."), + }) + } + + /// flip the sign of an expression, equivalent to `x => -(x)` + pub fn flipped_sign(self) -> Result { + Ok(match self { + Expression::SumExpression { sign, summands } => Expression::SumExpression { + sign: sign.flipped(), + summands, + }, + Expression::NaturalNumber(_) | Expression::VariableReference(_) => { + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![self], + } + } + Expression::ArrayLiteral(_) | Expression::StringLiteral(_) => { + r_panic!( + "Attempted to invert sign of array or string literal, \ + do not use += or -= on arrays or strings." + ); + } + }) + } + + // not sure if this is the compiler's concern or if it should be the parser + // (constant to add, variables to add, variables to subtract) + // currently multiplication is not supported so order of operations and flattening is very trivial + // If we add multiplication in future it will likely be constant multiplication only, so no variable on variable multiplication + pub fn flatten(&self) -> Result<(u8, Vec, Vec), String> { + let expr = self; + let mut imm_sum = Wrapping(0u8); + let mut additions = Vec::new(); + let mut subtractions = Vec::new(); + + match expr { + Expression::SumExpression { sign, summands } => { + let results: Result, Vec)>, String> = + summands.into_iter().map(|expr| expr.flatten()).collect(); + let flattened = results? + .into_iter() + .reduce(|acc, (imm, adds, subs)| { + ( + (Wrapping(acc.0) + Wrapping(imm)).0, + [acc.1, adds].concat(), + [acc.2, subs].concat(), + ) + }) + .unwrap_or((0, vec![], vec![])); + + match sign { + Sign::Positive => { + imm_sum += flattened.0; + additions.extend(flattened.1); + subtractions.extend(flattened.2); + } + Sign::Negative => { + imm_sum -= flattened.0; + subtractions.extend(flattened.1); + additions.extend(flattened.2); + } + }; + } + Expression::NaturalNumber(number) => { + imm_sum += Wrapping(*number as u8); + } + Expression::VariableReference(var) => { + additions.push(var.clone()); + } + Expression::ArrayLiteral(_) | Expression::StringLiteral(_) => { + r_panic!("Attempt to flatten an array-like expression: {expr:#?}"); + } + } + + Ok((imm_sum.0, additions, subtractions)) + } + + //Recursively Check If This Is Self Referencing + pub fn check_self_referencing(&self, parent: &VariableTarget) -> bool { + // TODO: make sure nested values work correctly + match self { + Expression::SumExpression { + sign: _sign, + summands, + } => summands + .iter() + .any(|summand| summand.check_self_referencing(parent)), + Expression::VariableReference(var) => *var == *parent, + Expression::ArrayLiteral(_) + | Expression::StringLiteral(_) + | Expression::NaturalNumber(_) => false, + } + } +} + +impl std::fmt::Display for Expression { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Expression::SumExpression { sign, summands } => { + if let Sign::Negative = sign { + f.write_str("- ")?; + } + f.write_str("(")?; + + let mut summands_iter = summands.iter(); + // TODO: refactor to remove the need for this + if let Some(first_expr) = summands_iter.next() { + f.write_fmt(format_args!("{first_expr}"))?; + for expr in summands_iter { + f.write_str(" ")?; + match expr { + Expression::SumExpression { + sign: Sign::Negative, + summands: _, + } => (), + _ => f.write_str("+ ")?, + } + f.write_fmt(format_args!("{expr}"))?; + } + } + + f.write_str(")")?; + } + Expression::NaturalNumber(number) => f.write_fmt(format_args!("{number}"))?, + Expression::VariableReference(variable_target) => { + f.write_fmt(format_args!("{variable_target}"))? + } + Expression::ArrayLiteral(expressions) => { + f.write_fmt(format_args!("[{}]", expressions.iter().join(", ")))?; + } + Expression::StringLiteral(s) => f.write_fmt(format_args!("\"{s}\""))?, + } + + Ok(()) + } +} diff --git a/compiler/src/parser/mod.rs b/compiler/src/parser/mod.rs new file mode 100644 index 0000000..250aa20 --- /dev/null +++ b/compiler/src/parser/mod.rs @@ -0,0 +1,6 @@ +pub mod expressions; +pub mod parser; +pub mod tokens; +pub mod types; + +mod tests; diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs new file mode 100644 index 0000000..a2339b2 --- /dev/null +++ b/compiler/src/parser/parser.rs @@ -0,0 +1,771 @@ +use super::{ + expressions::Expression, + tokens::{next_token, Token}, + types::{ + Clause, ExtendedOpcode, LocationSpecifier, Reference, TapeCellLocation, VariableTarget, + VariableTargetReferenceChain, VariableTypeReference, + }, +}; +use crate::{ + backend::{bf::TapeCell, bf2d::TapeCell2D, common::OpcodeVariant}, + macros::macros::{r_assert, r_panic}, + parser::types::VariableTypeDefinition, +}; + +pub fn parse_program( + raw: &str, +) -> Result>, String> { + let program_chars: Vec = raw.chars().collect(); + let mut chars_slice = &program_chars[..]; + let mut clauses = vec![]; + while let Some(clause) = parse_clause(&mut chars_slice)? { + clauses.push(clause); + } + + Ok(clauses) +} + +fn parse_clause( + chars: &mut &[char], +) -> Result>, String> { + let mut s = *chars; + // TODO: decide whether comments should be handled in the parser or not? + Ok(match next_token(&mut s)? { + Token::None => None, + Token::Semicolon => { + *chars = s; + Some(Clause::None) + } + Token::LeftBrace => Some(Clause::Block(parse_block_clauses(chars)?)), + Token::Output => Some(parse_output_clause(chars)?), + Token::Input => Some(parse_input_clause(chars)?), + Token::If => Some(parse_if_else_clause(chars)?), + Token::While => Some(parse_while_clause(chars)?), + Token::Fn => Some(parse_function_definition_clause(chars)?), + Token::Assert => Some(parse_assert_clause(chars)?), + Token::Struct => { + let Token::Name(_) = next_token(&mut s)? else { + // TODO: add source snippet + r_panic!("Expected identifier after `struct` keyword."); + }; + match next_token(&mut s)? { + Token::LeftBrace => Some(parse_struct_definition_clause(chars)?), + _ => Some(parse_let_clause(chars)?), + } + } + Token::Cell => Some(parse_let_clause(chars)?), + Token::Name(_) => match next_token(&mut s)? { + Token::LeftParenthesis => Some(parse_function_call_clause(chars)?), + _ => Some(parse_assign_clause(chars)?), + }, + Token::Drain | Token::Copy => Some(parse_drain_copy_clause(chars)?), + Token::PlusPlus => { + *chars = s; + Some(Clause::AddAssign { + var: parse_var_target(chars)?, + value: Expression::NaturalNumber(1), + self_referencing: false, + }) + } + Token::MinusMinus => { + *chars = s; + Some(Clause::AddAssign { + var: parse_var_target(chars)?, + value: Expression::NaturalNumber((-1i8 as u8) as usize), + self_referencing: false, + }) + } + Token::Bf => Some(parse_brainfuck_clause(chars)?), + token => r_panic!("Invalid starting token `{token}`."), + }) +} + +fn parse_block_clauses( + chars: &mut &[char], +) -> Result>, String> { + let Token::LeftBrace = next_token(chars)? else { + r_panic!("Expected `{{` in code block."); + }; + + let mut clauses = vec![]; + loop { + { + let mut s = *chars; + if let Token::RightBrace = next_token(&mut s)? { + *chars = s; + break; + } + } + let Some(clause) = parse_clause(chars)? else { + r_panic!("Expected clause in code block. This should not occur."); + }; + clauses.push(clause); + } + + Ok(clauses) +} + +//////////////////////////// +//////////////////////////// +//////////////////////////// + +impl TapeCellLocation for TapeCell { + fn parse_location_specifier( + chars: &mut &[char], + ) -> Result, String> { + let mut s = *chars; + let Token::At = next_token(&mut s)? else { + return Ok(LocationSpecifier::None); + }; + *chars = s; + + match next_token(&mut s)? { + Token::Minus | Token::Number(_) => Ok(LocationSpecifier::Cell(parse_integer(chars)?)), + // variable location specifier: + Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), + // TODO: add source snippet + token => r_panic!( + "Unexpected `{token}` found while parsing location specifier. (is 2D mode turned on?)" + ), + } + } + + fn to_positive_cell_offset(&self) -> Result { + r_assert!(*self >= 0, "Expected non-negative cell offset."); + Ok(*self as usize) + } +} + +impl TapeCellLocation for TapeCell2D { + fn parse_location_specifier( + chars: &mut &[char], + ) -> Result, String> { + let mut s = *chars; + let Token::At = next_token(&mut s)? else { + return Ok(LocationSpecifier::None); + }; + *chars = s; + + match next_token(&mut s)? { + Token::LeftParenthesis => { + // parse a 2-tuple + let tuple = parse_integer_tuple::<2>(chars)?; + Ok(LocationSpecifier::Cell(TapeCell2D(tuple[0], tuple[1]))) + } + Token::Minus | Token::Number(_) => Ok(LocationSpecifier::Cell(TapeCell2D( + parse_integer(chars)?, + 0, + ))), + // variable location specifier: + Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), + // TODO: add source snippet + token => { + r_panic!("Unexpected `{token}` found while parsing 2D location specifier.") + } + } + } + + fn to_positive_cell_offset(&self) -> Result { + r_assert!( + self.1 == 0 && self.0 >= 0, + "Expected non-negative 1st dimensional cell offset (i.e. (x,y) where y=0)." + ); + Ok(self.0 as usize) + } +} + +fn parse_var_type_definition( + chars: &mut &[char], +) -> Result, String> { + let mut var_type = match next_token(chars)? { + Token::Cell => VariableTypeReference::Cell, + Token::Struct => { + let Token::Name(struct_name) = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected struct type name in variable definition."); + }; + + VariableTypeReference::Struct(struct_name) + } + token => { + // TODO: add source snippet + r_panic!("Unexpected `{token}` found in variable type definition."); + } + }; + + // parse array specifiers + { + let mut s = *chars; + while let Token::LeftSquareBracket = next_token(&mut s)? { + var_type = VariableTypeReference::Array(Box::new(var_type), parse_subscript(chars)?); + s = chars; + } + } + + let Token::Name(name) = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected name in variable definition."); + }; + + Ok(VariableTypeDefinition { + var_type, + name, + location_specifier: TC::parse_location_specifier(chars)?, + }) +} + +/// parse the subscript of an array variable, e.g. [4] [6] [0] +/// must be compile-time constant +fn parse_subscript(chars: &mut &[char]) -> Result { + let Token::LeftSquareBracket = next_token(chars)? else { + // TODO: add program snippet + r_panic!("Expected `[` in array subscript."); + }; + let Token::Number(number) = next_token(chars)? else { + // TODO: add program snippet + r_panic!("Expected natural number in array subscript."); + }; + let Token::RightSquareBracket = next_token(chars)? else { + // TODO: add program snippet + r_panic!("Expected `]` in array subscript."); + }; + // TODO: handle errors here + Ok(number) +} + +pub fn parse_var_target(chars: &mut &[char]) -> Result { + let is_spread = { + let mut s = *chars; + if let Token::Asterisk = next_token(&mut s)? { + *chars = s; + true + } else { + false + } + }; + + let Token::Name(base_var_name) = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected identifier in variable target identifier."); + }; + + let mut ref_chain = vec![]; + loop { + let mut s = *chars; + match next_token(&mut s)? { + Token::LeftSquareBracket => { + let index = parse_subscript(chars)?; + ref_chain.push(Reference::Index(index)); + } + Token::Dot => { + *chars = s; + let Token::Name(subfield_name) = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected subfield name in variable target identifier."); + }; + ref_chain.push(Reference::NamedField(subfield_name)); + } + _ => break, + } + } + + Ok(VariableTarget { + name: base_var_name, + subfields: if ref_chain.len() > 0 { + Some(VariableTargetReferenceChain(ref_chain)) + } else { + None + }, + is_spread, + }) +} + +fn parse_integer(chars: &mut &[char]) -> Result { + let mut token = next_token(chars)?; + let mut is_negative = false; + if let Token::Minus = token { + is_negative = true; + token = next_token(chars)?; + } + let Token::Number(magnitude) = token else { + // TODO: add source snippet + r_panic!("Expected integer.") + }; + // TODO: handle errors here + Ok(match is_negative { + // TODO: truncation error handling + false => magnitude as i32, + true => -(magnitude as i32), + }) +} + +fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; LENGTH], String> { + let Token::LeftParenthesis = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected opening parenthesis in {LENGTH}-tuple.") + }; + + let mut tuple = [0; LENGTH]; + for (j, element) in tuple.iter_mut().enumerate() { + *element = parse_integer(chars)?; + + if j < LENGTH - 1 { + let Token::Comma = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected comma in {LENGTH}-tuple."); + }; + } + } + let Token::RightParenthesis = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected closing parenthesis in {LENGTH}-tuple."); + }; + + Ok(tuple) +} + +//////////////////////////// +//////////////////////////// +//////////////////////////// + +fn parse_if_else_clause( + chars: &mut &[char], +) -> Result, String> { + let Token::If = next_token(chars)? else { + // TODO: add program snippet + r_panic!("Expected \"if\" in if-else clause."); + }; + + let is_not = { + let mut s = *chars; + if let Token::Not = next_token(&mut s)? { + *chars = s; + true + } else { + false + } + }; + let condition = Expression::parse(chars)?; + { + let mut s = *chars; + let Token::LeftBrace = next_token(&mut s)? else { + r_panic!("Expected code block in if-else clause."); + }; + } + let block_one = parse_block_clauses(chars)?; + + let block_two = { + let mut s = *chars; + if let Token::Else = next_token(&mut s)? { + *chars = s; + Some(parse_block_clauses(chars)?) + } else { + None + } + }; + + Ok(match (is_not, block_one, block_two) { + (false, if_block, None) => Clause::If { + condition, + if_block, + }, + (true, if_not_block, None) => Clause::IfNot { + condition, + if_not_block, + }, + (false, if_block, Some(else_block)) => Clause::IfElse { + condition, + if_block, + else_block, + }, + (true, if_not_block, Some(else_block)) => Clause::IfNotElse { + condition, + if_not_block, + else_block, + }, + }) +} + +fn parse_while_clause( + chars: &mut &[char], +) -> Result, String> { + let Token::While = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected `while` in while clause."); + }; + + let condition = Expression::parse(chars)?; + // TODO: make while loops support expressions + let Expression::VariableReference(condition_variable) = condition else { + r_panic!("While clause expected variable target condition."); + }; + + { + let mut s = *chars; + let Token::LeftBrace = next_token(&mut s)? else { + r_panic!("Expected code block in while clause."); + }; + } + let loop_block = parse_block_clauses(chars)?; + + Ok(Clause::While { + var: condition_variable, + block: loop_block, + }) +} + +fn parse_function_definition_clause( + chars: &mut &[char], +) -> Result, String> { + let Token::Fn = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected `fn` in function definition clause."); + }; + + let Token::Name(function_name) = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected name in function definition clause."); + }; + + let Token::LeftParenthesis = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected argument list in function definition clause."); + }; + let mut arguments = vec![]; + loop { + { + let mut s = *chars; + if let Token::RightParenthesis = next_token(&mut s)? { + *chars = s; + break; + } + } + arguments.push(parse_var_type_definition(chars)?); + + match next_token(chars)? { + Token::RightParenthesis => break, + Token::Comma => (), + // TODO: add source snippet + _ => r_panic!("Unexpected token in function definition arguments."), + } + } + + Ok(Clause::DefineFunction { + name: function_name, + arguments, + block: parse_block_clauses(chars)?, + }) +} + +fn parse_function_call_clause(chars: &mut &[char]) -> Result, String> { + let Token::Name(function_name) = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected function name in function call clause."); + }; + + let Token::LeftParenthesis = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected `(` in function call clause."); + }; + + let mut arguments = vec![]; + loop { + { + let mut s = *chars; + if let Token::RightParenthesis = next_token(&mut s)? { + *chars = s; + break; + } + } + arguments.push(Expression::parse(chars)?); + + match next_token(chars)? { + Token::RightParenthesis => break, + Token::Comma => (), + // TODO: add source snippet + _ => r_panic!("Unexpected token in function call arguments."), + } + } + + Ok(Clause::CallFunction { + function_name, + arguments, + }) +} + +/// Parse tokens representing a struct definition into a clause +fn parse_struct_definition_clause( + chars: &mut &[char], +) -> Result, String> { + let Token::Struct = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected `struct` in struct definition."); + }; + + let Token::Name(name) = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected name in struct definition."); + }; + + let Token::LeftBrace = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected `{{` in struct clause."); + }; + + let mut fields = vec![]; + loop { + let field = parse_var_type_definition::(chars)?; + fields.push(field.try_into()?); + let Token::Semicolon = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected semicolon after struct definition field."); + }; + { + let mut s = *chars; + if let Token::RightBrace = next_token(&mut s)? { + *chars = s; + break; + } + } + } + + Ok(Clause::DefineStruct { name, fields }) +} + +/// parse variable declarations and definitions. +/// e.g. `cell x = 0;` or `struct DummyStruct y;` +fn parse_let_clause(chars: &mut &[char]) -> Result, String> { + let var = parse_var_type_definition(chars)?; + + let mut s = *chars; + if let Token::EqualsSign = next_token(&mut s)? { + *chars = s; + let expr = Expression::parse(chars)?; + let Token::Semicolon = next_token(chars)? else { + r_panic!("Expected semicolon after variable definition."); + }; + return Ok(Clause::DefineVariable { var, value: expr }); + } + let Token::Semicolon = next_token(chars)? else { + r_panic!("Expected semicolon after variable declaration."); + }; + Ok(Clause::DeclareVariable { var }) +} + +fn parse_output_clause(chars: &mut &[char]) -> Result, String> { + let Token::Output = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected keyword `output` in output clause."); + }; + + let expr = Expression::parse(chars)?; + + let Token::Semicolon = next_token(chars)? else { + r_panic!("Expected semicolon at end of output clause."); + }; + + Ok(Clause::Output { value: expr }) +} + +fn parse_input_clause(chars: &mut &[char]) -> Result, String> { + let Token::Input = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected keyword `input` in input clause."); + }; + + let var = parse_var_target(chars)?; + + let Token::Semicolon = next_token(chars)? else { + r_panic!("Expected semicolon at end of input clause."); + }; + + Ok(Clause::Input { var }) +} + +fn parse_assign_clause(chars: &mut &[char]) -> Result, String> { + let var = parse_var_target(chars)?; + + let operator = next_token(chars)?; + match operator { + Token::EqualsSign | Token::PlusEquals | Token::MinusEquals => (), + token => r_panic!("Unexpected `{token}` in assignment clause."), + } + + let expr = Expression::parse(chars)?; + + // TODO: refactor this, at the very least make it nuanced per-cell, as this was added before subfields were added + let self_referencing = expr.check_self_referencing(&var); + + let Token::Semicolon = next_token(chars)? else { + r_panic!( + "Expected semicolon at end of {} clause.", + match operator { + Token::EqualsSign => "assignment", + Token::PlusEquals => "addition-assignment", + Token::MinusEquals => "subtraction-assignment", + _ => unreachable!(), + } + ); + }; + + Ok(match operator { + Token::EqualsSign => Clause::Assign { + var, + value: expr, + self_referencing, + }, + Token::PlusEquals => Clause::AddAssign { + var, + value: expr, + self_referencing, + }, + Token::MinusEquals => Clause::AddAssign { + var, + value: expr.flipped_sign()?, + self_referencing, + }, + _ => unreachable!(), + }) +} + +/// parse a drain/copy loop: +/// `drain g {i += 1;};` +/// `drain g into j;` +/// `copy foo into bar {g += 2; etc;};` +fn parse_drain_copy_clause( + chars: &mut &[char], +) -> Result, String> { + let is_copying = match next_token(chars)? { + Token::Copy => true, + Token::Drain => false, + token => r_panic!("Unexpected `{token}` in drain/copy clause."), + }; + + let source = Expression::parse(chars)?; + + let mut targets = Vec::new(); + { + let mut s = *chars; + if let Token::Into = next_token(&mut s)? { + *chars = s; + loop { + // parse var target before delimiters because into must precede a target + targets.push(parse_var_target(chars)?); + { + let mut s = *chars; + if let Token::LeftBrace | Token::Semicolon = next_token(&mut s)? { + break; + } + } + } + } + } + + let block = { + let mut s = *chars; + match next_token(&mut s)? { + Token::LeftBrace => Some(parse_block_clauses(chars)?), + Token::Semicolon => { + *chars = s; + None + } + token => r_panic!("Unexpected `{token}` in drain/copy clause."), + } + }; + + Ok(Clause::DrainLoop { + source, + targets, + block, + is_copying, + }) +} + +fn parse_assert_clause(chars: &mut &[char]) -> Result, String> { + let Token::Assert = next_token(chars)? else { + r_panic!("Expected `assert` in assert clause."); + }; + + let var = parse_var_target(chars)?; + + let value = match next_token(chars)? { + Token::Unknown => None, + Token::Equals => Some(Expression::parse(chars)?), + token => r_panic!("Unexpected `{token}` in assert clause."), + }; + + let Token::Semicolon = next_token(chars)? else { + r_panic!("Expected semicolon at end of assert clause."); + }; + + Ok(Clause::AssertVariableValue { var, value }) +} + +fn parse_brainfuck_clause( + chars: &mut &[char], +) -> Result, String> { + let Token::Bf = next_token(chars)? else { + r_panic!("Expected `bf` in in-line Brainfuck clause."); + }; + + let location_specifier = TC::parse_location_specifier(chars)?; + let mut clobbered_variables = vec![]; + { + let mut s = *chars; + // parse the rare `clobbers` keyword, borrowed from GCC I think? // TODO: look this up + if let Token::Clobbers = next_token(&mut s)? { + *chars = s; + loop { + clobbered_variables.push(parse_var_target(chars)?); + { + let mut s = *chars; + if let Token::LeftBrace = next_token(&mut s)? { + break; + } + } + } + } + } + + let Token::LeftBrace = next_token(chars)? else { + r_panic!("Expected `{{` in in-line Brainfuck clause."); + }; + + // tokenise and parse in-line brainfuck: + // totally different tokenisation to mastermind + let mut operations = vec![]; + loop { + match chars.get(0) { + Some(c) => match OC::try_from_char(*c) { + Some(opcode) => { + *chars = &chars[1..]; + operations.push(ExtendedOpcode::Opcode(opcode)); + } + None => match c { + '{' => { + // recursively parse inner mastermind block + operations.push(ExtendedOpcode::Block(parse_block_clauses(chars)?)); + } + '}' => { + *chars = &chars[1..]; + break; + } + c if c.is_whitespace() => { + *chars = &chars[1..]; + } + c => r_panic!("Unexpected character `{c}` in Brainfuck clause."), + }, + }, + None => { + // TODO: add source snippet + r_panic!("Unexpected end of file in Brainfuck clause."); + } + } + } + + Ok(Clause::Brainfuck { + location_specifier, + clobbered_variables, + operations, + }) +} diff --git a/compiler/src/parser/tests.rs b/compiler/src/parser/tests.rs new file mode 100644 index 0000000..f3d4074 --- /dev/null +++ b/compiler/src/parser/tests.rs @@ -0,0 +1,658 @@ +#[cfg(test)] +mod parser_tests { + use super::super::{ + expressions::{Expression, Sign}, + parser::parse_program, + types::{ + Clause, ExtendedOpcode, LocationSpecifier, VariableTarget, VariableTypeDefinition, + VariableTypeReference, + }, + }; + use crate::backend::{ + bf::{Opcode, TapeCell}, + bf2d::{Opcode2D, TapeCell2D}, + }; + + fn _parser_test(raw: &str, expected: &[Clause]) { + assert_eq!(parse_program(raw).unwrap(), expected); + } + + fn _parser_test_2d(raw: &str, expected: &[Clause]) { + assert_eq!(parse_program(raw).unwrap(), expected); + } + + #[test] + fn parse_if_1() { + _parser_test( + "if true {{}}", + &[Clause::If { + condition: Expression::NaturalNumber(1), + if_block: vec![Clause::::Block(vec![])], + }], + ); + } + + #[test] + fn end_tokens_1() { + assert_eq!( + parse_program::("clobbers").unwrap_err(), + "Invalid starting token `clobbers`." + ); + } + + #[test] + fn end_tokens_2() { + assert_eq!( + parse_program::("cell;").unwrap_err(), + "Expected name in variable definition." + ) + } + + #[test] + fn while_condition_1() { + _parser_test( + "while x {{}}", + &[Clause::While { + var: VariableTarget { + name: String::from("x"), + subfields: None, + is_spread: false, + }, + block: vec![Clause::Block(vec![])], + }], + ); + } + + #[test] + fn two_dimensional_1() { + assert_eq!( + parse_program::("cell x @(0, 1);").unwrap_err(), + "Unexpected `(` found while parsing location specifier. (is 2D mode turned on?)" + ); + } + + #[test] + fn two_dimensional_2() { + _parser_test_2d( + "cell x @(0, 1);", + &[Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("x"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::Cell(TapeCell2D(0, 1)), + }, + }], + ); + } + + #[test] + fn two_dimensional_3() { + _parser_test_2d( + "cell xyz @(-10, -101);", + &[Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("xyz"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::Cell(TapeCell2D(-10, -101)), + }, + }], + ); + } + + #[test] + fn var_v_1d() { + _parser_test( + "cell v;", + &[Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None, + }, + }], + ); + } + + #[test] + fn var_v_2d() { + _parser_test_2d( + "cell v;", + &[Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None, + }, + }], + ); + } + + #[test] + fn inline_bf_1() { + _parser_test( + "cell v; bf {+{cell v;}-}", + &[ + Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None, + }, + }, + Clause::Brainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode::Add), + ExtendedOpcode::Block(vec![Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None, + }, + }]), + ExtendedOpcode::Opcode(Opcode::Subtract), + ], + }, + ], + ); + } + + #[test] + fn inline_bf_2() { + _parser_test_2d( + "cell v; bf {v{cell v;}^}", + &[ + Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None, + }, + }, + Clause::Brainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Block(vec![Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None, + }, + }]), + ExtendedOpcode::Opcode(Opcode2D::Up), + ], + }, + ], + ) + } + + #[test] + fn inline_bf_3() { + _parser_test_2d( + "bf {vvvv>}", + &[Clause::Brainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Right), + ], + }], + ) + } + + #[test] + fn inline_bf_4() { + assert_eq!( + parse_program::("bf {vvvv>}").unwrap_err(), + "Unexpected character `v` in Brainfuck clause." + ); + } + + #[test] + fn strings_1() { + _parser_test( + r#" +cell[5] ggghh = "hello"; +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 5, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::StringLiteral(String::from("hello")), + }], + ); + } + + #[test] + fn strings_1a() { + _parser_test( + r#" +cell[0] ggghh = ""; +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 0, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::StringLiteral(String::from("")), + }], + ); + } + + #[test] + fn strings_1b() { + _parser_test( + r#" +cell[1] ggghh = "hello"; +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 1, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::StringLiteral(String::from("hello")), + }], + ); + } + + #[test] + fn strings_2() { + _parser_test( + r#" +cell[6] ggghh = "hel'lo"; +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 6, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::StringLiteral(String::from("hel'lo")), + }], + ); + } + + #[test] + fn strings_3() { + _parser_test( + r#" +cell[7] ggghh = "\"hello\""; +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 7, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::StringLiteral(String::from("\"hello\"")), + }], + ); + } + + #[test] + fn arrays_1() { + _parser_test( + r#" +cell[0] ggghh = []; +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 0, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::ArrayLiteral(vec![]), + }], + ); + } + + #[test] + fn arrays_2() { + _parser_test( + r#" +cell[333] arr = [45, 53]; +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 333, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::ArrayLiteral(vec![ + Expression::NaturalNumber(45), + Expression::NaturalNumber(53), + ]), + }], + ); + } + + #[test] + fn arrays_2a() { + _parser_test( + r#" +cell[333] arr = [45 + 123, 53]; +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 333, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::ArrayLiteral(vec![ + Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::NaturalNumber(45), + Expression::NaturalNumber(123), + ], + }, + Expression::NaturalNumber(53), + ]), + }], + ); + } + + #[test] + fn arrays_2b() { + _parser_test( + r#" +cell[333] arr = [45 + 123, -(53 + 0+78-9)]; +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 333, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::ArrayLiteral(vec![ + Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::NaturalNumber(45), + Expression::NaturalNumber(123), + ], + }, + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![ + Expression::NaturalNumber(53), + Expression::NaturalNumber(0), + Expression::NaturalNumber(78), + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(9)], + }, + ], + }, + ]), + }], + ); + } + + #[test] + fn arrays_3() { + _parser_test( + r#" +cell[3] arr = ['h', 53, (((4)))]; +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 3, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::ArrayLiteral(vec![ + Expression::NaturalNumber(104), + Expression::NaturalNumber(53), + Expression::NaturalNumber(4), + ]), + }], + ); + } + + #[test] + fn arrays_4() { + _parser_test( + r#" +struct nonsense[39] arr @-56 = ["hello!", 53, [4,5,6]]; +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Struct(String::from("nonsense"))), + 39, + ), + location_specifier: LocationSpecifier::Cell(-56), + }, + value: Expression::ArrayLiteral(vec![ + Expression::StringLiteral(String::from("hello!")), + Expression::NaturalNumber(53), + Expression::ArrayLiteral(vec![ + Expression::NaturalNumber(4), + Expression::NaturalNumber(5), + Expression::NaturalNumber(6), + ]), + ]), + }], + ); + } + + #[test] + fn arrays_5() { + _parser_test( + r#" +struct nonsense[39] arr @-56 = ["hello!", ',', [4,"hello comma: ,",6]]; +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Struct(String::from("nonsense"))), + 39, + ), + location_specifier: LocationSpecifier::Cell(-56), + }, + value: Expression::ArrayLiteral(vec![ + Expression::StringLiteral(String::from("hello!")), + Expression::NaturalNumber(44), + Expression::ArrayLiteral(vec![ + Expression::NaturalNumber(4), + Expression::StringLiteral(String::from("hello comma: ,")), + Expression::NaturalNumber(6), + ]), + ]), + }], + ); + } + + #[test] + fn sums_1() { + _parser_test( + r#" +struct nonsense[39] arr @-56 = 56 - ( 4+3+( -7-5 +(6)-(((( (0) )))) ) ); +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Struct(String::from("nonsense"))), + 39, + ), + location_specifier: LocationSpecifier::Cell(-56), + }, + value: Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::NaturalNumber(56), + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![ + Expression::NaturalNumber(4), + Expression::NaturalNumber(3), + Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(7)], + }, + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(5)], + }, + Expression::NaturalNumber(6), + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(0)], + }, + ], + }, + ], + }, + ], + }, + }], + ); + } + + #[test] + fn empty_clauses_1() { + _parser_test(";", &[Clause::None]); + } + + #[test] + fn empty_clauses_1a() { + _parser_test("; ", &[Clause::None]); + _parser_test(";\n", &[Clause::None]); + _parser_test("\n;\n;\n", &[Clause::None, Clause::None]); + } + + #[test] + fn empty_clauses_2() { + _parser_test( + " ; ;{;output 3 ; ;} ; ; ", + &[ + Clause::None, + Clause::None, + Clause::Block(vec![ + Clause::None, + Clause::Output { + value: Expression::NaturalNumber(3), + }, + Clause::None, + ]), + Clause::None, + Clause::None, + ], + ); + } + + #[test] + fn blocks_1() { + _parser_test("{}", &[Clause::Block(vec![])]); + _parser_test( + ";;{;;};;", + &[ + Clause::None, + Clause::None, + Clause::Block(vec![Clause::None, Clause::None]), + Clause::None, + Clause::None, + ], + ); + } + + #[test] + fn blocks_1a() { + _parser_test( + " {}{} {} {} ", + &[ + Clause::Block(vec![]), + Clause::Block(vec![]), + Clause::Block(vec![]), + Clause::Block(vec![]), + ], + ); + } + + #[test] + fn blocks_1b() { + _parser_test( + " {}{{{{}}{}}} {} {} ", + &[ + Clause::Block(vec![]), + Clause::Block(vec![Clause::Block(vec![ + Clause::Block(vec![Clause::Block(vec![])]), + Clause::Block(vec![]), + ])]), + Clause::Block(vec![]), + Clause::Block(vec![]), + ], + ); + } + + #[test] + fn blocks_2() { + _parser_test( + "{output 1;output 2;}{{{} output 3;}}", + &[ + Clause::Block(vec![ + Clause::Output { + value: Expression::NaturalNumber(1), + }, + Clause::Output { + value: Expression::NaturalNumber(2), + }, + ]), + Clause::Block(vec![Clause::Block(vec![ + Clause::Block(vec![]), + Clause::Output { + value: Expression::NaturalNumber(3), + }, + ])]), + ], + ); + } +} diff --git a/compiler/src/parser/tokens.rs b/compiler/src/parser/tokens.rs new file mode 100644 index 0000000..a99252e --- /dev/null +++ b/compiler/src/parser/tokens.rs @@ -0,0 +1,1072 @@ +// TODO: make an impl for a tokeniser, inverse-builder pattern? +// have a function to peek, then accept changes, so we don't double hangle tokens + +use crate::macros::macros::r_panic; + +/// Get the next token from chars, advance the passed in pointer +pub fn next_token(chars: &mut &[char]) -> Result { + // skip any whitespace + loop { + match chars.get(0) { + Some(c) => { + if !c.is_whitespace() { + break; + } + } + None => break, + } + *chars = &chars[1..]; + } + + // read the first character and branch from there + let Some(c) = chars.get(0) else { + return Ok(Token::None); + }; + Ok(match *c { + c @ (';' | '{' | '}' | '(' | ')' | '[' | ']' | '.' | ',' | '*' | '@' | '=' | '+' | '-') => { + *chars = &chars[1..]; + match c { + ';' => Token::Semicolon, + '{' => Token::LeftBrace, + '}' => Token::RightBrace, + '(' => Token::LeftParenthesis, + ')' => Token::RightParenthesis, + '[' => Token::LeftSquareBracket, + ']' => Token::RightSquareBracket, + '.' => Token::Dot, + ',' => Token::Comma, + '*' => Token::Asterisk, + '@' => Token::At, + '=' => Token::EqualsSign, + '+' => match chars.get(0) { + Some('+') => { + *chars = &chars[1..]; + Token::PlusPlus + } + Some('=') => { + *chars = &chars[1..]; + Token::PlusEquals + } + _ => Token::Plus, + }, + '-' => match chars.get(0) { + Some('-') => { + *chars = &chars[1..]; + Token::MinusMinus + } + Some('=') => { + *chars = &chars[1..]; + Token::MinusEquals + } + _ => Token::Minus, + }, + _ => unreachable!(), + } + } + '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => { + Token::Number(parse_number(chars)?) + } + 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' + | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' | 'C' + | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' + | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '_' => { + let word = parse_word(chars)?; + match word.as_str() { + "output" => Token::Output, + "input" => Token::Input, + "fn" => Token::Fn, + "cell" => Token::Cell, + "struct" => Token::Struct, + "while" => Token::While, + "if" => Token::If, + "not" => Token::Not, + "else" => Token::Else, + "copy" => Token::Copy, + "drain" => Token::Drain, + "into" => Token::Into, + "bf" => Token::Bf, + "clobbers" => Token::Clobbers, + "assert" => Token::Assert, + "equals" => Token::Equals, + "unknown" => Token::Unknown, + "true" => Token::True, + "false" => Token::False, + _ => Token::Name(word), + } + } + '\'' => Token::Character(parse_character_literal(chars)?), + '"' => Token::String(parse_string_literal(chars)?), + _ => r_panic!("Invalid token found: `{c}`."), + }) +} + +fn parse_number(chars: &mut &[char]) -> Result { + // parse hexadecimal and binary + // if let Some('0') = chars.get(0) { + // match chars.get(1) { + // // Some('x') => { + // // let mut i = 2; + // // } + // // Some('b') => { + // // let mut i = 2; + // // } + // _ => (), + // } + // } + + // parse decimal natural number + let mut i = 0; + let mut n = 0; + loop { + let Some(digit) = chars.get(i) else { + break; + }; + match digit { + c @ ('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => { + n *= 10; + n += (*c as usize) - ('0' as usize); + } + // '_' => { + // // TODO: support underscores in number literals? + // } + ';' | '{' | '}' | '(' | ')' | '[' | ']' | '.' | ',' | '*' | '@' | '+' | '-' => break, + c if c.is_whitespace() => break, + 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' + | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' + | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' + | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '_' => { + // TODO: add source snippet + r_panic!("Unexpected word character in number token.") + } + // TODO: add source snippet + _ => r_panic!("Unknown character found while parsing number token."), + } + i += 1; + } + + // update used characters + assert!(i <= chars.len()); + *chars = &chars[i..]; + + Ok(n) +} + +fn parse_word(chars: &mut &[char]) -> Result { + let mut i = 0; + let mut parsed_word = String::new(); + + { + let Some( + c @ ('a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' + | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' + | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' + | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '_'), + ) = chars.get(i) + else { + r_panic!("Expected non-numeral character at start of word."); + }; + parsed_word.push(*c); + i += 1; + } + + while let Some( + c @ ('a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' + | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' + | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' + | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '_' | '0' | '1' | '2' + | '3' | '4' | '5' | '6' | '7' | '8' | '9'), + ) = chars.get(i) + { + parsed_word.push(*c); + i += 1; + } + + // update used characters + assert!(i <= chars.len()); + *chars = &chars[i..]; + + Ok(parsed_word) +} + +/// handle character escape sequences, supports Rust ASCII escapes +fn parse_character_literal(chars: &mut &[char]) -> Result { + let mut i = 0; + let Some('\'') = chars.get(i) else { + r_panic!("Expected `'` at start of character literal."); + }; + i += 1; + let c = match chars.get(i) { + Some('\\') => { + i += 1; + let c = match chars.get(i) { + Some(c) => match c { + '\'' => '\'', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '\\' => '\\', + '0' => '\0', + // TODO: add source snippet + _ => r_panic!("Invalid escape sequence in character literal."), + }, + None => r_panic!("Expected escape sequence in character literal."), + }; + c + } + Some('\'') => r_panic!("Unexpected `'` in character literal, must be length 1."), + Some(c) => *c, + None => r_panic!("Unexpected end of file while parsing character literal."), + }; + i += 1; + let Some('\'') = chars.get(i) else { + r_panic!("Expected `'` at end of character literal. Character literals must be length 1."); + }; + i += 1; + + // update used characters + assert!(i <= chars.len()); + *chars = &chars[i..]; + + Ok(c) +} + +/// handle string escape sequences, supports Rust ASCII escapes +fn parse_string_literal(chars: &mut &[char]) -> Result { + let mut parsed_string = String::new(); + let mut i = 0; + let Some('"') = chars.get(i) else { + r_panic!("Expected `\"` at start of string literal."); + }; + i += 1; + loop { + match chars.get(i) { + None => r_panic!("Unexpected end of input in string literal."), + Some('\\') => { + i += 1; + parsed_string.push(match chars.get(i) { + Some('\"') => '"', + Some('n') => '\n', + Some('r') => '\r', + Some('t') => '\t', + Some('\\') => '\\', + Some('0') => '\0', + // TODO: add source snippet + _ => r_panic!("Invalid escape sequence in string literal."), + }); + } + Some('"') => break, + Some(c) => parsed_string.push(*c), + } + i += 1; + } + + let Some('"') = chars.get(i) else { + r_panic!("Expected `\"` at end of string literal."); + }; + i += 1; + + // update used characters + assert!(i <= chars.len()); + *chars = &chars[i..]; + + Ok(parsed_string) +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Token { + None, + Output, + Input, + Fn, + Cell, + Struct, + While, + If, + Not, + Else, + Copy, + Drain, + Into, + Bf, + Clobbers, + Assert, + Equals, + Unknown, + True, + False, + LeftBrace, + RightBrace, + LeftSquareBracket, + RightSquareBracket, + LeftParenthesis, + RightParenthesis, + Comma, + Dot, + Asterisk, + At, + Plus, + Minus, + EqualsSign, + Semicolon, + PlusPlus, + MinusMinus, + PlusEquals, + MinusEquals, + Name(String), + Number(usize), + String(String), + Character(char), +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Token::Output + | Token::Input + | Token::Fn + | Token::Cell + | Token::Struct + | Token::While + | Token::If + | Token::Not + | Token::Else + | Token::Copy + | Token::Drain + | Token::Into + | Token::Bf + | Token::Clobbers + | Token::Assert + | Token::Equals + | Token::Unknown + | Token::True + | Token::False + | Token::LeftBrace + | Token::RightBrace + | Token::LeftSquareBracket + | Token::RightSquareBracket + | Token::LeftParenthesis + | Token::RightParenthesis + | Token::Comma + | Token::Dot + | Token::Asterisk + | Token::At + | Token::Plus + | Token::Minus + | Token::EqualsSign + | Token::Semicolon + | Token::PlusPlus + | Token::MinusMinus + | Token::PlusEquals + | Token::MinusEquals => f.write_str(match self { + Token::Output => "output", + Token::Input => "input", + Token::Fn => "fn", + Token::Cell => "cell", + Token::Struct => "struct", + Token::While => "while", + Token::If => "if", + Token::Not => "not", + Token::Else => "else", + Token::Copy => "copy", + Token::Drain => "drain", + Token::Into => "into", + Token::Bf => "bf", + Token::Clobbers => "clobbers", + Token::Assert => "assert", + Token::Equals => "equals", + Token::Unknown => "unknown", + Token::True => "true", + Token::False => "false", + Token::LeftBrace => "{", + Token::RightBrace => "}", + Token::LeftSquareBracket => "[", + Token::RightSquareBracket => "]", + Token::LeftParenthesis => "(", + Token::RightParenthesis => ")", + Token::Comma => ",", + Token::Dot => ".", + Token::Asterisk => "*", + Token::At => "@", + Token::Plus => "+", + Token::Minus => "-", + Token::EqualsSign => "=", + Token::Semicolon => ";", + Token::PlusPlus => "++", + Token::MinusMinus => "--", + Token::PlusEquals => "+=", + Token::MinusEquals => "-=", + _ => unreachable!(), + }), + Token::Name(name) => f.write_str(name), + Token::Number(number) => f.write_fmt(format_args!("{number}")), + Token::String(s) => f.write_fmt(format_args!("\"{s}\"")), + Token::Character(c) => f.write_fmt(format_args!("'{c}'")), + Token::None => Ok(()), + } + } +} + +#[cfg(test)] +mod tokeniser_tests { + use super::*; + + fn tokenise(input_str: &str) -> Result, String> { + let chars_vec: Vec = input_str.chars().collect(); + let mut chars_slice = &chars_vec[..]; + let mut tokens = vec![]; + loop { + let token = next_token(&mut chars_slice)?; + if let Token::None = token { + break; + } + tokens.push(token); + } + Ok(tokens) + } + + fn _tokenisation_test(input_str: &str, desired_output: &[Token]) { + let actual_output = tokenise(input_str).unwrap(); + println!("desired: {desired_output:#?}"); + println!("actual: {actual_output:#?}"); + assert!(actual_output.iter().eq(desired_output)); + } + + #[test] + fn empty_1() { + _tokenisation_test("", &[]); + } + + #[test] + fn empty_1a() { + _tokenisation_test(" \n \t ", &[]); + } + + #[test] + fn empty_2() { + let chars_vec: Vec = "".chars().collect(); + let mut chars_slice = &chars_vec[..]; + assert_eq!(next_token(&mut chars_slice).unwrap(), Token::None); + } + + #[test] + fn empty_2a() { + let chars_vec: Vec = "\n \t \n ".chars().collect(); + let mut chars_slice = &chars_vec[..]; + assert_eq!(next_token(&mut chars_slice).unwrap(), Token::None); + } + + #[test] + fn single() { + let desired_output = [ + Token::EqualsSign, + Token::EqualsSign, + Token::Semicolon, + Token::Semicolon, + Token::Asterisk, + Token::Asterisk, + Token::At, + Token::At, + Token::LeftSquareBracket, + Token::LeftSquareBracket, + Token::LeftBrace, + Token::LeftBrace, + Token::LeftParenthesis, + Token::LeftParenthesis, + Token::RightSquareBracket, + Token::RightSquareBracket, + Token::RightBrace, + Token::RightBrace, + Token::RightParenthesis, + Token::RightParenthesis, + Token::Dot, + Token::Dot, + Token::Comma, + Token::Comma, + ]; + _tokenisation_test("==;;**@@[[{{((]]}}))..,,", &desired_output); + _tokenisation_test(" == ; ;**@ @[[ {{ ( (] ]}} )). ., ,", &desired_output); + } + + #[test] + fn double_1() { + _tokenisation_test( + "+=+=-=-=++++----", + &[ + Token::PlusEquals, + Token::PlusEquals, + Token::MinusEquals, + Token::MinusEquals, + Token::PlusPlus, + Token::PlusPlus, + Token::MinusMinus, + Token::MinusMinus, + ], + ); + } + + #[test] + fn double_1a() { + _tokenisation_test( + "+ =+ = -= -=+ +++ - - --", + &[ + Token::Plus, + Token::EqualsSign, + Token::Plus, + Token::EqualsSign, + Token::MinusEquals, + Token::MinusEquals, + Token::Plus, + Token::PlusPlus, + Token::Plus, + Token::Minus, + Token::Minus, + Token::MinusMinus, + ], + ); + } + + #[test] + fn double_2() { + _tokenisation_test( + "-++=+++=+-=--=---=-+++++-+-----", + &[ + Token::Minus, + Token::PlusPlus, + Token::EqualsSign, + Token::PlusPlus, + Token::PlusEquals, + Token::Plus, + Token::MinusEquals, + Token::MinusMinus, + Token::EqualsSign, + Token::MinusMinus, + Token::MinusEquals, + Token::Minus, + Token::PlusPlus, + Token::PlusPlus, + Token::Plus, + Token::Minus, + Token::Plus, + Token::MinusMinus, + Token::MinusMinus, + Token::Minus, + ], + ); + } + + #[test] + fn double_2a() { + _tokenisation_test( + "-+ +=+ ++=+-=-- =-- - =-+ +++ +-+-- - --", + &[ + Token::Minus, + Token::Plus, + Token::PlusEquals, + Token::Plus, + Token::PlusPlus, + Token::EqualsSign, + Token::Plus, + Token::MinusEquals, + Token::MinusMinus, + Token::EqualsSign, + Token::MinusMinus, + Token::Minus, + Token::EqualsSign, + Token::Minus, + Token::Plus, + Token::PlusPlus, + Token::Plus, + Token::Plus, + Token::Minus, + Token::Plus, + Token::MinusMinus, + Token::Minus, + Token::MinusMinus, + ], + ); + } + + #[test] + fn single_and_double() { + _tokenisation_test( + "=+==;+=- =;*---=++*@@[[{{+ +((]--]}+-+})).---.-,,", + &[ + Token::EqualsSign, + Token::PlusEquals, + Token::EqualsSign, + Token::Semicolon, + Token::PlusEquals, + Token::Minus, + Token::EqualsSign, + Token::Semicolon, + Token::Asterisk, + Token::MinusMinus, + Token::MinusEquals, + Token::PlusPlus, + Token::Asterisk, + Token::At, + Token::At, + Token::LeftSquareBracket, + Token::LeftSquareBracket, + Token::LeftBrace, + Token::LeftBrace, + Token::Plus, + Token::Plus, + Token::LeftParenthesis, + Token::LeftParenthesis, + Token::RightSquareBracket, + Token::MinusMinus, + Token::RightSquareBracket, + Token::RightBrace, + Token::Plus, + Token::Minus, + Token::Plus, + Token::RightBrace, + Token::RightParenthesis, + Token::RightParenthesis, + Token::Dot, + Token::MinusMinus, + Token::Minus, + Token::Dot, + Token::Minus, + Token::Comma, + Token::Comma, + ], + ); + } + + #[test] + fn keywords() { + _tokenisation_test( + r#" +output output input input fn fn cell cell struct struct while while if +if not not else else copy copy drain drain into into bf bf clobbers clobbers + assert assert equals equals unknown unknown true true false false +"#, + &[ + Token::Output, + Token::Output, + Token::Input, + Token::Input, + Token::Fn, + Token::Fn, + Token::Cell, + Token::Cell, + Token::Struct, + Token::Struct, + Token::While, + Token::While, + Token::If, + Token::If, + Token::Not, + Token::Not, + Token::Else, + Token::Else, + Token::Copy, + Token::Copy, + Token::Drain, + Token::Drain, + Token::Into, + Token::Into, + Token::Bf, + Token::Bf, + Token::Clobbers, + Token::Clobbers, + Token::Assert, + Token::Assert, + Token::Equals, + Token::Equals, + Token::Unknown, + Token::Unknown, + Token::True, + Token::True, + Token::False, + Token::False, + ], + ); + } + + #[test] + fn keywords_and_simples() { + _tokenisation_test( + r#"unknown,assert,equals.into;struct)clobbers-- -+input+++not(else{ +if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, + &[ + Token::Unknown, + Token::Comma, + Token::Assert, + Token::Comma, + Token::Equals, + Token::Dot, + Token::Into, + Token::Semicolon, + Token::Struct, + Token::RightParenthesis, + Token::Clobbers, + Token::MinusMinus, + Token::Minus, + Token::Plus, + Token::Input, + Token::PlusPlus, + Token::Plus, + Token::Not, + Token::LeftParenthesis, + Token::Else, + Token::LeftBrace, + Token::If, + Token::Fn, + Token::LeftBrace, + Token::Output, + Token::RightParenthesis, + Token::True, + Token::RightParenthesis, + Token::False, + Token::Minus, + Token::While, + Token::Asterisk, + Token::At, + Token::Copy, + Token::At, + Token::PlusEquals, + Token::At, + Token::Drain, + Token::MinusEquals, + Token::Into, + Token::EqualsSign, + Token::RightSquareBracket, + Token::LeftSquareBracket, + Token::Bf, + Token::Dot, + Token::Cell, + ], + ); + } + + #[test] + fn names_1() { + _tokenisation_test("i", &[Token::Name(String::from("i"))]); + } + + #[test] + fn names_1a() { + _tokenisation_test("_", &[Token::Name(String::from("_"))]); + } + + #[test] + fn names_2() { + _tokenisation_test( + "while hello", + &[Token::While, Token::Name(String::from("hello"))], + ); + } + + #[test] + fn names_2a() { + _tokenisation_test("while_", &[Token::Name(String::from("while_"))]); + } + + #[test] + fn names_2b() { + _tokenisation_test( + "if_else_while_hello;welcome\ninto the if club", + &[ + Token::Name(String::from("if_else_while_hello")), + Token::Semicolon, + Token::Name(String::from("welcome")), + Token::Into, + Token::Name(String::from("the")), + Token::If, + Token::Name(String::from("club")), + ], + ); + } + + #[test] + fn names_2c() { + _tokenisation_test( + "hello{If;elSe ___if}\n\n\nclobberss", + &[ + Token::Name(String::from("hello")), + Token::LeftBrace, + Token::Name(String::from("If")), + Token::Semicolon, + Token::Name(String::from("elSe")), + Token::Name(String::from("___if")), + Token::RightBrace, + Token::Name(String::from("clobberss")), + ], + ); + } + + #[test] + fn names_2d() { + _tokenisation_test( + "hello while you were gone I", + &[ + Token::Name(String::from("hello")), + Token::While, + Token::Name(String::from("you")), + Token::Name(String::from("were")), + Token::Name(String::from("gone")), + Token::Name(String::from("I")), + ], + ); + } + + #[test] + fn character_literals_1() { + _tokenisation_test( + r#"'a' 'b' 'c' ' '"#, + &[ + Token::Character('a'), + Token::Character('b'), + Token::Character('c'), + Token::Character(' '), + ], + ); + } + + #[test] + fn character_literals_2() { + _tokenisation_test(r#"'\n'"#, &[Token::Character('\n')]); + } + + #[test] + fn character_literals_3() { + _tokenisation_test(r#"'"'"#, &[Token::Character('"')]); + } + + #[test] + fn character_literals_4() { + _tokenisation_test(r#"'\''"#, &[Token::Character('\'')]); + } + + #[test] + fn character_literals_5() { + assert_eq!( + tokenise(r#"'\'"#).unwrap_err(), + "Expected `'` at end of character literal. Character literals must be length 1." + ); + } + + #[test] + fn character_literals_6() { + assert_eq!( + tokenise(r#"'aa'"#).unwrap_err(), + "Expected `'` at end of character literal. Character literals must be length 1." + ); + } + + #[test] + fn character_literals_7() { + assert_eq!( + tokenise(r#"''"#).unwrap_err(), + "Unexpected `'` in character literal, must be length 1." + ); + } + + #[test] + fn string_literals_1() { + _tokenisation_test("\"hello\"", &[Token::String(String::from("hello"))]); + } + + #[test] + fn string_literals_2() { + _tokenisation_test(r#""""#, &[Token::String(String::from(""))]); + } + + #[test] + fn string_literals_2a() { + _tokenisation_test( + r#""""""#, + &[ + Token::String(String::from("")), + Token::String(String::from("")), + ], + ); + } + + #[test] + fn string_literals_3() { + _tokenisation_test( + r#""\"" " ""#, + &[ + Token::String(String::from("\"")), + Token::String(String::from(" ")), + ], + ); + } + + #[test] + fn numbers_dec_1() { + _tokenisation_test( + "1 123 000098763", + &[ + Token::Number(1), + Token::Number(123), + Token::Number(000098763), + ], + ); + } + + #[test] + fn numbers_dec_2() { + _tokenisation_test( + ".0654 567.32", + &[ + Token::Dot, + Token::Number(654), + Token::Number(567), + Token::Dot, + Token::Number(32), + ], + ); + } + + #[test] + #[ignore] + fn numbers_hex_1() { + _tokenisation_test( + "0x56 0x00 0x00ff1 0x4ff2", + &[ + Token::Number(0x56), + Token::Number(0x00), + Token::Number(0xff1), + Token::Number(0x4ff2), + ], + ); + } + + #[test] + #[ignore] + fn numbers_hex_1a() { + _tokenisation_test( + "0x 56 0x00 0x00f f1 0 x4ff2", + &[ + Token::Number(0), + Token::Name(String::from("x")), + Token::Number(56), + Token::Number(0x00), + Token::Number(0x00f), + Token::Name(String::from("f1")), + Token::Number(0), + Token::Name(String::from("x4ff2")), + ], + ); + } + + #[test] + #[ignore] + fn numbers_hex_2() { + _tokenisation_test( + "0x56 0x00 0x00ff1 0x4ff2", + &[ + Token::Number(0x56), + Token::Number(0x00), + Token::Number(0xff1), + Token::Number(0x4ff2), + ], + ); + } + + #[test] + #[ignore] + fn numbers_hex_2a() { + _tokenisation_test( + "0x 56 0x00 0x00f f1 0 x4ff2", + &[ + Token::Number(0), + Token::Name(String::from("x")), + Token::Number(56), + Token::Number(0x00), + Token::Number(0x00f), + Token::Name(String::from("f1")), + Token::Number(0), + Token::Name(String::from("x4ff2")), + ], + ); + } + + #[test] + #[ignore] + fn numbers_bin_1() { + _tokenisation_test( + "0b1111 0b000 0b0 0b1 0b1010100 0b001101", + &[ + Token::Number(0b1111), + Token::Number(0b000), + Token::Number(0b0), + Token::Number(0b1), + Token::Number(0b1010100), + Token::Number(0b001101), + ], + ); + } + + #[test] + #[ignore] + fn numbers_bin_1a() { + _tokenisation_test( + "0b1 111 0 b000 0 b 0 0b1 0b101 0100 0b001101", + &[ + Token::Number(0b1), + Token::Number(111), + Token::Number(0), + Token::Name(String::from("b000")), + Token::Number(0), + Token::Name(String::from("b")), + Token::Number(0), + Token::Number(0b1), + Token::Number(0b101), + Token::Number(100), + Token::Number(0b1101), + ], + ); + } + + #[test] + #[ignore] + fn numbers_hex_bin_1() { + _tokenisation_test( + "0x11001 0b11001", + &[Token::Number(0x11001), Token::Number(0b11001)], + ); + } + + #[test] + #[ignore] + fn numbers_hex_bin_2() { + for s in [ + "0b00102", "0b013000", "0b010040", "0b050000", "0b66000", "0b017", "0b8", "0b90", + "0b01a0", "0b4b", "0b01c0", "0b0d", "0b01e0", "0b01f", + ] { + assert_eq!(tokenise(s).unwrap_err(), ""); + } + } + + #[test] + fn numbers_and_words_dec() { + assert_eq!( + tokenise("456hello").unwrap_err(), + "Unexpected word character in number token." + ); + } + + #[test] + #[ignore] + fn numbers_and_words_hex() { + assert_eq!( + tokenise("0x00free me").unwrap_err(), + "Unexpected word character in number token." + ); + } + + #[test] + #[ignore] + fn numbers_and_words_bin() { + assert_eq!( + tokenise("0b00ebrave").unwrap_err(), + "Unexpected word character in number token." + ); + } +} diff --git a/compiler/src/parser/types.rs b/compiler/src/parser/types.rs new file mode 100644 index 0000000..2b39862 --- /dev/null +++ b/compiler/src/parser/types.rs @@ -0,0 +1,286 @@ +use super::expressions::Expression; +use crate::macros::macros::r_panic; + +/// Clause type type variables: +/// - TC: TapeCell can be changed to implement 2D brainfuck, or other modifications +/// - OC: Opcode represents the valid Brainfuck Opcodes that we're generating (also used for 2D or other BF variants) +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub enum Clause { + None, + DeclareVariable { + var: VariableTypeDefinition, + }, + DefineVariable { + var: VariableTypeDefinition, + value: Expression, + }, + DefineStruct { + name: String, + fields: Vec, + }, + AddAssign { + var: VariableTarget, + value: Expression, + self_referencing: bool, + }, + Assign { + var: VariableTarget, + value: Expression, + self_referencing: bool, + }, + AssertVariableValue { + var: VariableTarget, + // Some(constant) indicates we know the value, None indicates we don't know the value + // typically will either be used for assert unknown or assert 0 + value: Option, + }, + DrainLoop { + source: Expression, + targets: Vec, + block: Option>>, + // TODO: reassess this syntax + is_copying: bool, + }, + While { + var: VariableTarget, + block: Vec>, + }, + Output { + value: Expression, + }, + Input { + var: VariableTarget, + }, + DefineFunction { + name: String, + // TODO: fix the type here, as function definitions don't actually need location specifiers and therefore don't need a tape cell type + arguments: Vec>, + block: Vec>, + }, + CallFunction { + function_name: String, + arguments: Vec, + }, + If { + condition: Expression, + if_block: Vec>, + }, + IfNot { + condition: Expression, + if_not_block: Vec>, + }, + IfElse { + condition: Expression, + if_block: Vec>, + else_block: Vec>, + }, + IfNotElse { + condition: Expression, + if_not_block: Vec>, + else_block: Vec>, + }, + Block(Vec>), + Brainfuck { + location_specifier: LocationSpecifier, + clobbered_variables: Vec, + operations: Vec>, + }, +} + +pub trait TapeCellLocation +where + Self: Sized + std::fmt::Display, +{ + /// optionally parse a memory location specifier + /// let g @(4,2) = 68; + /// or + /// let p @3 = 68; + fn parse_location_specifier(chars: &mut &[char]) -> Result, String>; + + /// safely cast a 2D or 1D location specifier into a 1D non-negative cell offset, + /// for use with struct fields + fn to_positive_cell_offset(&self) -> Result; +} + +// extended brainfuck opcodes to include mastermind code blocks +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub enum ExtendedOpcode { + Opcode(OC), + Block(Vec>), +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +/// the type of a variable according to the user, not validated yet as the parser does not keep track of types +pub enum VariableTypeReference { + Cell, + Struct(String), + Array(Box, usize), +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub enum LocationSpecifier { + None, + Cell(TC), + Variable(VariableTarget), +} +impl LocationSpecifier { + fn is_none(&self) -> bool { + matches!(self, LocationSpecifier::None) + } +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct VariableTypeDefinition { + pub name: String, + pub var_type: VariableTypeReference, + pub location_specifier: LocationSpecifier, + // Infinite {name: String, pattern: ???}, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct StructFieldTypeDefinition { + pub name: String, + pub field_type: VariableTypeReference, + pub location_offset_specifier: Option, +} +// let non_neg_location_specifier = match &var_def.location_specifier { +// LocationSpecifier::None => None, +// LocationSpecifier::Cell(l) => { +// // assert the y coordinate is 0 +// // r_assert!( +// // l.1 == 0, +// // "Struct field location specifiers do not support 2D grid cells: {var_def}" +// // ); +// r_assert!( +// l.0 >= 0, +// "Struct field location specifiers must be non-negative: {var_def}" +// ); +// Some(l.0 as usize) +// } +// LocationSpecifier::Variable(_) => { +// r_panic!( "Location specifiers in struct definitions must be relative, not variables: {var_def}") +// } +// }; +impl TryInto for VariableTypeDefinition +where + TC: TapeCellLocation, +{ + type Error = String; + + fn try_into(self) -> Result { + let location_offset_specifier = match &self.location_specifier { + LocationSpecifier::None => None, + LocationSpecifier::Cell(cell) => Some(match cell.to_positive_cell_offset() { + Ok(offset) => offset, + Err(err) => r_panic!("Cannot create struct field \"{self}\". {err}"), + }), + LocationSpecifier::Variable(_) => r_panic!( + "Location specifiers in struct definitions \ +must be relative, not variable." + ), + }; + Ok(StructFieldTypeDefinition { + name: self.name, + field_type: self.var_type, + location_offset_specifier, + }) + } +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub enum Reference { + NamedField(String), + Index(usize), +} + +/// Represents a list of subfield references after the `.` or `[x]` operators, e.g. `obj.h[6]` would have `['h', '[6]']` +// a bit verbose, not quite sure about this +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct VariableTargetReferenceChain(pub Vec); +/// Represents a target variable in an expression, this has no type informatino +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct VariableTarget { + pub name: String, + pub subfields: Option, + pub is_spread: bool, +} +impl VariableTarget { + /// convert a definition to a target for use with definition clauses (as opposed to declarations) + pub fn from_definition(var_def: &VariableTypeDefinition) -> Self { + VariableTarget { + name: var_def.name.clone(), + subfields: None, + is_spread: false, + } + } +} + +impl std::fmt::Display for VariableTypeReference { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self { + VariableTypeReference::Cell => f.write_str("cell"), + VariableTypeReference::Struct(struct_name) => { + f.write_fmt(format_args!("struct {struct_name}")) + } + VariableTypeReference::Array(element_type, len) => { + f.write_fmt(format_args!("{element_type}[{len}]")) + } + } + } +} + +impl std::fmt::Display for VariableTypeDefinition { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&format!("{} {}", self.var_type, self.name))?; + match &self.location_specifier { + LocationSpecifier::Cell(_) | LocationSpecifier::Variable(_) => { + f.write_str(&format!(" {}", self.location_specifier))? + } + LocationSpecifier::None => (), + } + + Ok(()) + } +} + +impl std::fmt::Display for LocationSpecifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("@")?; + match self { + LocationSpecifier::Cell(cell) => f.write_str(&format!("{cell}"))?, + LocationSpecifier::Variable(var) => f.write_str(&format!("{var}"))?, + LocationSpecifier::None => (), + } + + Ok(()) + } +} + +impl std::fmt::Display for Reference { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Reference::NamedField(subfield_name) => f.write_str(&format!(".{subfield_name}"))?, + Reference::Index(index) => f.write_str(&format!("[{index}]"))?, + } + + Ok(()) + } +} + +impl std::fmt::Display for VariableTarget { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.is_spread { + f.write_str("*")?; + } + f.write_str(&self.name)?; + if let Some(subfield_refs) = &self.subfields { + for ref_step in subfield_refs.0.iter() { + f.write_str(&format!("{ref_step}"))?; + } + } + + Ok(()) + } +} diff --git a/compiler/src/preprocessor.rs b/compiler/src/preprocessor.rs index 98542b7..bcf9ea8 100644 --- a/compiler/src/preprocessor.rs +++ b/compiler/src/preprocessor.rs @@ -1,8 +1,12 @@ // take in a file, read includes and simple conditionals and output a file with those includes pasted in // C-style +// TODO: add tests for this! + use std::{collections::HashMap, path::PathBuf}; +use itertools::Itertools; + use crate::macros::macros::r_assert; pub fn preprocess(file_path: PathBuf) -> String { @@ -72,3 +76,57 @@ pub fn preprocess_from_memory( Ok(acc) } + +/// strips comments from input program, does not support anything else +pub fn strip_comments(raw_program: &str) -> String { + let mut stripped = raw_program + .lines() + .map(|line| line.split_once("//").map_or_else(|| line, |(left, _)| left)) + .join("\n"); + // join doesn't add a newline to the end, here we re-add it, this is probably unnecessary + if raw_program.ends_with("\n") { + stripped.push_str("\n"); + } + stripped +} + +#[cfg(test)] +pub mod preprocessor_tests { + use crate::preprocessor::strip_comments; + + #[test] + fn comments_0() { + assert_eq!(strip_comments(""), ""); + assert_eq!(strip_comments("\n\t\t\n"), "\n\t\t\n"); + } + + #[test] + fn comments_1() { + assert_eq!(strip_comments("hi//hello"), "hi"); + } + + #[test] + fn comments_2() { + assert_eq!(strip_comments("h//i // hello"), "h"); + } + + #[test] + fn comments_3() { + assert_eq!( + strip_comments( + r#" +hello // don't talk to me +second line +// third line comment +fourth line +"# + ), + r#" +hello +second line + +fourth line +"# + ); + } +} diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index c82b8b8..ddc5706 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -2,22 +2,28 @@ // black box testing #[cfg(test)] -pub mod tests { +pub mod black_box_tests { use crate::{ - brainfuck::{tests::run_code, BVMConfig}, - builder::{BrainfuckOpcodes, Builder, Opcode}, - compiler::Compiler, - parser::parse, - tokeniser::{tokenise, Token}, - MastermindConfig, + backend::{ + bf::{Opcode, TapeCell}, + bf2d::{Opcode2D, TapeCell2D}, + common::{ + BrainfuckBuilder, BrainfuckBuilderData, BrainfuckProgram, CellAllocator, + CellAllocatorData, OpcodeVariant, TapeCellVariant, + }, + }, + brainfuck::{bvm_tests::run_code, BrainfuckConfig}, + misc::{MastermindConfig, MastermindContext}, + parser::parser::parse_program, + preprocessor::strip_comments, }; // TODO: run test suite with different optimisations turned on const OPT_NONE: MastermindConfig = MastermindConfig { optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_unreachable_loops: false, optimise_constants: false, optimise_empty_blocks: false, @@ -29,8 +35,8 @@ pub mod tests { optimise_generated_code: true, optimise_generated_all_permutations: false, optimise_cell_clearing: true, - optimise_variable_usage: true, - optimise_memory_allocation: true, + // optimise_variable_usage: true, + // optimise_memory_allocation: true, optimise_unreachable_loops: true, optimise_constants: true, optimise_empty_blocks: true, @@ -38,141 +44,176 @@ pub mod tests { enable_2d_grid: false, }; - const OPT_NONE_TILES: MastermindConfig = MastermindConfig { + const OPT_NONE_2D_TILES: MastermindConfig = MastermindConfig { optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_unreachable_loops: false, optimise_constants: false, optimise_empty_blocks: false, memory_allocation_method: 3, - enable_2d_grid: false, + enable_2d_grid: true, }; - const OPT_NONE_SPIRAL: MastermindConfig = MastermindConfig { + const OPT_NONE_2D_SPIRAL: MastermindConfig = MastermindConfig { optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_unreachable_loops: false, optimise_constants: false, optimise_empty_blocks: false, memory_allocation_method: 2, - enable_2d_grid: false, + enable_2d_grid: true, }; - const OPT_NONE_ZIG_ZAG: MastermindConfig = MastermindConfig { + const OPT_NONE_2D_ZIG_ZAG: MastermindConfig = MastermindConfig { optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_unreachable_loops: false, optimise_constants: false, optimise_empty_blocks: false, memory_allocation_method: 1, - enable_2d_grid: false, + enable_2d_grid: true, }; - const BVM_CONFIG_1D: BVMConfig = BVMConfig { + const BVM_CONFIG_1D: BrainfuckConfig = BrainfuckConfig { enable_debug_symbols: false, enable_2d_grid: false, }; - const BVM_CONFIG_2D: BVMConfig = BVMConfig { + const BVM_CONFIG_2D: BrainfuckConfig = BrainfuckConfig { enable_debug_symbols: false, enable_2d_grid: true, }; const TESTING_BVM_MAX_STEPS: usize = 100_000_000; - fn compile_and_run(program: String, input: String) -> Result { - // println!("{program}"); - // compile mastermind - let tokens: Vec = tokenise(&program)?; - // println!("{tokens:#?}"); - let clauses = parse(&tokens)?; - // println!("{clauses:#?}"); - let instructions = Compiler { config: &OPT_NONE } - .compile(&clauses, None)? - .finalise_instructions(false); - // println!("{instructions:#?}"); - let bf_program = Builder { config: &OPT_NONE }.build(instructions, false)?; + fn compile_and_run<'a, TC: 'static + TapeCellVariant, OC: 'static + OpcodeVariant>( + raw_program: &str, + input: &str, + ) -> Result + where + BrainfuckBuilderData: BrainfuckBuilder, + CellAllocatorData: CellAllocator, + Vec: BrainfuckProgram, + { + let ctx = MastermindContext { config: OPT_NONE }; + let stripped_program = strip_comments(raw_program); + let clauses = parse_program::(&stripped_program)?; + let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); + let bf_program = ctx.ir_to_bf(instructions, None)?; let bfs = bf_program.to_string(); - // println!("{}", bfs); + // run generated brainfuck with input - Ok(run_code( - BVM_CONFIG_1D, - bfs, - input, - Some(TESTING_BVM_MAX_STEPS), - )) - } - - fn compile_program( - program: String, - config: Option<&MastermindConfig>, - ) -> Result, String> { - // println!("{program}"); - // compile mastermind - let tokens: Vec = tokenise(&program)?; - // println!("{tokens:#?}"); - let clauses = parse(&tokens)?; - // println!("{clauses:#?}"); - let instructions = Compiler { - config: config.unwrap_or(&OPT_NONE), - } - .compile(&clauses, None)? - .finalise_instructions(false); - // println!("{instructions:#?}"); - let bf_code = Builder { - config: config.unwrap_or(&OPT_NONE), - } - .build(instructions, false)?; - // println!("{}", bfs); + run_code(BVM_CONFIG_1D, &bfs, input, Some(TESTING_BVM_MAX_STEPS)) + } - Ok(bf_code) + fn compile_program<'a, TC: 'static + TapeCellVariant, OC: 'static + OpcodeVariant>( + raw_program: &str, + config: Option, + ) -> Result + where + BrainfuckBuilderData: BrainfuckBuilder, + CellAllocatorData: CellAllocator, + Vec: BrainfuckProgram, + { + let ctx = MastermindContext { + config: config.unwrap_or(OPT_NONE), + }; + let stripped_program = strip_comments(raw_program); + let clauses = parse_program::(&stripped_program)?; + let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); + let bf_code = ctx.ir_to_bf(instructions, None)?; + + Ok(bf_code.to_string()) } - // #[test] - fn dummy_success_test() { - let program = String::from(""); - let input = String::from(""); - let desired_output = String::from(""); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) + #[test] + fn empty_program_1() { + assert_eq!(compile_and_run::("", "").unwrap(), ""); } - // #[test] - fn dummy_compile_fail_test() { - let program = String::from(""); - let result = compile_program(program, None); - assert!(result.is_err()); + #[test] + fn empty_program_1a() { + assert_eq!(compile_and_run::(";;;", "").unwrap(), ""); } - // #[test] - fn dummy_code_test() { - let program = String::from(""); - let desired_code = String::from(""); - let code = compile_program(program, None).expect("").to_string(); - println!("{code}"); - assert_eq!(desired_code, code); + #[test] + fn empty_program_2() { + assert_eq!(compile_and_run::("{}", "").unwrap(), ""); + } - let input = String::from(""); - let desired_output = String::from(""); - let output = run_code(BVM_CONFIG_1D, code, input, None); - println!("{output}"); - assert_eq!(desired_output, output) + #[test] + fn empty_program_2a() { + assert_eq!( + compile_and_run::("{;;};", "").unwrap(), + "" + ); + } + + #[test] + fn empty_program_2b() { + assert_eq!( + compile_and_run::("{{{{}}}}", "").unwrap(), + "" + ); + } + + #[test] + fn empty_program_2c() { + assert_eq!( + compile_and_run::( + "{{}} {} {{{}{}}} {{{ { }{ }} {{ }{ }}} {{{ }{}}{{} {}}}}", + "" + ) + .unwrap(), + "" + ); + } + + #[test] + fn empty_program_2d() { + assert_eq!( + compile_and_run::( + "{{}} {} {{{}{}}} {{{ { }{ ;}}; {{ }{ }};} {{{; }{;};}{;{;};; {};}}}", + "" + ) + .unwrap(), + "" + ); + } + + #[test] + fn empty_program_3() { + assert_eq!(compile_and_run::(";", "").unwrap(), ""); + } + + #[test] + fn empty_program_3a() { + assert_eq!( + compile_and_run::(";;;;;;", "").unwrap(), + "" + ); + } + + #[test] + fn empty_program_3b() { + assert_eq!( + compile_and_run::(";;{;{;};};;;", "").unwrap(), + "" + ); } #[test] fn hello_1() { - let program = String::from( - " + let program = r#" cell h = 8; cell e = 5; cell l = 12; @@ -187,34 +228,32 @@ output l; output o; cell ten = 10; output ten; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hello\n" ); - let input = String::from(""); - let desired_output = String::from("hello\n"); - assert_eq!(desired_output, compile_and_run(program, input).expect("")); } #[test] fn hello_2() { - let program = String::from( - " + let program = r#" output 'h'; output 'e'; output 'l'; output 'l'; output 'o'; output 10; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hello\n" ); - let input = String::from(""); - let desired_output = String::from("hello\n"); - assert_eq!(desired_output, compile_and_run(program, input).expect("")) } #[test] fn hello_3() { - let program = String::from( - r#"; + let program = r#" output 'h' ;;; // comment cell[5] EEL = "ello\n"; @@ -226,19 +265,16 @@ output EEL[4]; output '\n'; output 0; output 70; - "#, - ); - let input = String::from(""); - let desired_output = String::from("hello\n\n\0F"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hello\n\n\0F" + ) } #[test] fn hello_4() { - let program = String::from( - r#" + let program = r#" cell[4] str = [5, 12, 12, 15]; cell a = 'a' - 1; drain a into *str; @@ -247,48 +283,39 @@ output *str; output 46; output 10; output "What?"; -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "Hello.\nWhat?" ); - let input = String::from(""); - let desired_output = String::from("Hello.\nWhat?"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn hello_5() { - let program = String::from( - r#" + let program = r#" output "Hell"; output ['o', '.', '\n']; -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "Hello.\n" ); - let input = String::from(""); - let desired_output = String::from("Hello.\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn expressions_1() { - let program = String::from( - r#"; + let program = r#" output '@' + 256 + 1 + false + true + 'e' - '@'; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "g" ); - let input = String::from(""); - let desired_output = String::from("g"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn expressions_2() { - let program = String::from( - r#"; + let program = r#" cell p = 9 - (true + true -(-7)); if not p { output "Hi friend!\n"; @@ -301,19 +328,16 @@ if q { } else { output "path b"; } - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "Hi friend!\npath b" ); - let input = String::from(""); - let desired_output = String::from("Hi friend!\npath b"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn expressions_3() { - let program = String::from( - r#"; + let program = r#" if 56 - 7 { output 'A'; } else { @@ -333,19 +357,16 @@ if not_a - 'a' { } else { output 'F'; } - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "ACb" ); - let input = String::from(""); - let desired_output = String::from("ACb"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn expressions_4() { - let program = String::from( - r#"; + let program = r#" cell x = 5; cell A = 'A'; @@ -355,102 +376,84 @@ drain 0 + x + 1 into A { output ' '; output A; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "666666 G" ); - let input = String::from(""); - let desired_output = String::from("666666 G"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_1() { - let program = String::from( - r#"; + let program = r#" cell x = 5; output '0' + x; x += 1; output '0' + x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "56" ); - let input = String::from(""); - let desired_output = String::from("56"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_2() { - let program = String::from( - r#"; + let program = r#" cell x = 5; output '0' + x; x = x + 1; output '0' + x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "56" ); - let input = String::from(""); - let desired_output = String::from("56"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_3() { - let program = String::from( - r#"; + let program = r#" cell x = 5; output '0' + x; x += 1 + x; output '0' + x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "5;" ); - let input = String::from(""); - let desired_output = String::from("5;"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_4() { - let program = String::from( - r#"; + let program = r#" cell x = 2; output '0' + x; x = x + x + x; output '0' + x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "26" ); - let input = String::from(""); - let desired_output = String::from("26"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_5() { - let program = String::from( - r#"; + let program = r#" cell x = 2; x = (2 + 3) - ((x + 4) + 1) + 4 - (12) + (3 + 10); output '0' + x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "3" ); - let input = String::from(""); - let desired_output = String::from("3"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_6() { - let program = String::from( - r#"; + let program = r#" cell[2] x = [4, 5]; x[0] = x[0] + 4; x[1] = x[1] - 3; @@ -458,19 +461,16 @@ x[1] = x[1] - 3; x[0] += '0'; x[1] += '0'; output *x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "82" ); - let input = String::from(""); - let desired_output = String::from("82"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_7() { - let program = String::from( - r#"; + let program = r#" cell[2] x = [1, 2]; x[0] = x[1] + 5; // 7 x[1] = x[0] + x[1]; // 9 @@ -478,107 +478,199 @@ x[1] = x[0] + x[1]; // 9 x[0] += '0'; x[1] += '0'; output *x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "79" ); - let input = String::from(""); - let desired_output = String::from("79"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_8() { - let program = String::from( - r#"; + let program = r#" cell x = 128; output x - 2; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "~" ); - let input = String::from(""); - let desired_output = String::from("~"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_8a() { - let program = String::from( - r#"; + let program = r#" cell x = 127; cell y = 64; x += y + y; output x + 'f' + 1; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "f" ); - let input = String::from(""); - let desired_output = String::from("f"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_8b() { - let program = String::from( - r#"; + let program = r#" cell x = 128; cell y = 64; x += y + y; output x + 'f'; - "#, - ); - let input = String::from(""); - let desired_output = String::from("f"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "f" + ) } #[test] - fn assignments_9() -> Result<(), String> { - let program = String::from( - r#"; + fn assignments_9() { + let program = r#" cell x = 128; x += 128; output x + 'f'; - "#, - ); - let input = String::from(""); - let desired_output = String::from("f"); - let code = compile_program(program, Some(&OPT_ALL))?; - assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, code.to_string(), input, None) - ); - Ok(()) +"#; + let code = compile_program::(program, Some(OPT_ALL)).unwrap(); + println!("{code}"); + assert!(code.len() < 200); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "f"); } #[test] - fn assignments_9a() -> Result<(), String> { - let program = String::from( - r#"; + fn assignments_9a() { + let program = r#" cell x = 126; x += 2; x += 128; output x + 'f'; - "#, - ); - let input = String::from(""); - let desired_output = String::from("f"); - let code = compile_program(program, Some(&OPT_ALL))?; +"#; + let code = compile_program::(program, Some(OPT_ALL)).unwrap(); + println!("{code}"); + assert!(code.len() < 200); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "f"); + } + + #[test] + fn increment_1() { + let program = r#" +cell x = 'h'; +output x; +++x; +output x; +"#; assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, code.to_string(), input, None) - ); - Ok(()) + compile_and_run::(program, "").unwrap(), + "hi" + ) + } + + #[test] + fn increment_2() { + let program = r#" +cell x = 'h'; +output x; +--x; +output x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hg" + ) + } + + // TODO: add pre-increment to expressions? (probably not worth it) + #[test] + #[ignore] + fn increment_3() { + let program = r#" +cell x = 'a'; +output ++x; +output x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "bb" + ) + } + + #[test] + #[ignore] + fn increment_3a() { + let program = r#" +cell x = 'a'; +output x; +output ++x + 2; +output x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "adb" + ) + } + + #[test] + #[ignore] + fn increment_3b() { + let program = r#" +cell x = 'd'; +output --x; +output x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "cc" + ) + } + + #[test] + #[ignore] + fn increment_3c() { + let program = r#" +cell x = 'd'; +output 4+--x; +output --x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "gb" + ) + } + + #[test] + #[ignore] + fn increment_4() { + let program = r#" +cell x = -1; +if ++x {output 'T';} +else {output 'F';} +output 'e' + ++x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "Ff" + ) + } + + #[test] + #[ignore] + fn increment_4a() { + let program = r#" +cell x = 0; +if --x {output 'T';} +else {output 'F';} +output 'e' + x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "Td" + ) } #[test] fn loops_1() { - let program = String::from( - " + let program = r#" cell n = '0'; cell a = 10; cell b = 1; @@ -593,17 +685,17 @@ drain a { b += 1; output 10; }; - ", - ); - let input = String::from(""); - let desired_output = String::from("0AB\n1ABB\n2ABBB\n3ABBBB\n4ABBBBB\n5ABBBBBB\n6ABBBBBBB\n7ABBBBBBBB\n8ABBBBBBBBB\n9ABBBBBBBBBB\n"); - assert_eq!(desired_output, compile_and_run(program, input).expect("")) +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "0AB\n1ABB\n2ABBB\n3ABBBB\n4ABBBBB\n5ABBBBBB\n6ABBBBBBB\n7ABBBBBBBB\n8ABBBBBBBB\ +B\n9ABBBBBBBBBB\n" + ) } #[test] fn loops_2() { - let program = String::from( - " + let program = r#" cell a = 4; cell[6] b = [65, 65, 65, 65, 65, 1]; copy a into b[0] b[1] b[4] b[5] { @@ -619,30 +711,28 @@ copy a into b[0] b[1] b[4] b[5] { cell g = 5; drain g into a {output a;} - ", - ); - let input = String::from(""); - let desired_output = String::from("AABAA\nBBDAB\nCCGAC\nDDKAD\neefghi"); - assert_eq!(desired_output, compile_and_run(program, input).expect("")) +"#; + assert_eq!( + compile_and_run::(program, "").expect(""), + "AABAA\nBBDAB\nCCGAC\nDDKAD\neefghi" + ) } #[test] fn loops_3() { - let program = String::from( - " + let program = r#" drain 40; output 'h'; - ", - ); - let input = String::from(""); - let desired_output = String::from("h"); - assert_eq!(desired_output, compile_and_run(program, input).expect("")) +"#; + assert_eq!( + compile_and_run::(program, "").expect(""), + "h" + ) } #[test] fn ifs_1() { - let program = String::from( - " + let program = r#" cell x = 7; cell y = 9; @@ -672,19 +762,16 @@ if not z { }; output 10; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "ACE\n" ); - let input = String::from(""); - let desired_output = String::from("ACE\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn ifs_2() { - let program = String::from( - " + let program = r#" cell x = 7; cell y = 9; @@ -716,38 +803,32 @@ if not z { } output 10; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "ACE\n" ); - let input = String::from(""); - let desired_output = String::from("ACE\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn ifs_3() { - let program = String::from( - " + let program = r#" cell a = 5; if a { cell b = a + '0'; output b; } output 10; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "5\n" ); - let input = String::from(""); - let desired_output = String::from("5\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn loops_and_ifs_1() { - let program = String::from( - " + let program = r#" cell n = '0'; cell a = 6; cell b; @@ -770,19 +851,16 @@ drain a { b += 1; output 10; }; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "0ABB\n1ABB\n2ABB\n3ABBBBBBBBBB\n4ABB\n5ABB\n" ); - let input = String::from(""); - let desired_output = String::from("0ABB\n1ABB\n2ABB\n3ABBBBBBBBBB\n4ABB\n5ABB\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_1() { - let program = String::from( - " + let program = r#" cell global_var = '0'; fn func_0(cell grape) { @@ -807,19 +885,16 @@ output global_var; output global_var; output 10; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "010131\n" ); - let input = String::from(""); - let desired_output = String::from("010131\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] - fn functions_2() -> Result<(), String> { - let program = String::from( - " + fn functions_2() { + let program = r#" cell global_var = '0'; fn func_0(cell grape) { @@ -843,23 +918,15 @@ func_0(global_var); output global_var; output 10; - ", - ); - let input = String::from(""); - let desired_output = String::from("01231\n"); - let code = compile_program(program, Some(&OPT_NONE))?.to_string(); - println!("{}", code); - let output = run_code(BVM_CONFIG_1D, code, input, None); - println!("{output}"); - assert_eq!(desired_output, output); - - Ok(()) +"#; + let code = compile_program::(program, None).unwrap(); + println!("{code}"); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "01231\n"); } #[test] fn functions_3() { - let program = String::from( - " + let program = r#" cell global_var = '0'; cell[2] global_vars = ['0', 64]; @@ -917,19 +984,16 @@ fn func_2(cell[4] think, cell green) { // output green; // green = 0; }; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "01202726631\n@1202726631\n" ); - let input = String::from(""); - let desired_output = String::from("01202726631\n@1202726631\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_3a() { - let program = String::from( - r#" + let program = r#" cell[4] a = "AACD"; add_one(a[1]); output *a; @@ -937,19 +1001,16 @@ output *a; fn add_one(cell cel) { ++cel; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "ABCD" ); - let input = String::from(""); - let desired_output = String::from("ABCD"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_3b() { - let program = String::from( - r#" + let program = r#" struct A {cell[3] arr;}; struct A a; a.arr[0] = '0'; @@ -964,19 +1025,16 @@ fn add_one_to_three(cell[3] t) { t[1] += 1; t[2] += 1; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "111" ); - let input = String::from(""); - let desired_output = String::from("111"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_3c() { - let program = String::from( - r#" + let program = r#" struct A {cell b; cell c;}; struct A a; a.b = '0'; @@ -991,19 +1049,16 @@ output a.c; fn add_one(cell t) { ++t; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "12" ); - let input = String::from(""); - let desired_output = String::from("12"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_3d() { - let program = String::from( - r#" + let program = r#" struct A {cell b; cell c;}; struct A a; a.b = '0'; @@ -1029,19 +1084,16 @@ fn add_one(struct A t) { ++t.b; ++t.c; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "12\n23" ); - let input = String::from(""); - let desired_output = String::from("12\n23"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_3e() { - let program = String::from( - r#" + let program = r#" struct A {cell b; cell c;}; struct A a; a.b = '0'; @@ -1068,20 +1120,16 @@ fn add_one(struct A t, cell a) { ++t.c; ++a; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "12\n33" ); - let input = String::from(""); - let desired_output = String::from("12\n33"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] - #[should_panic] fn functions_3f() { - let program = String::from( - r#" + let program = r#" struct A {cell b; cell c;}; struct A a; a.b = '0'; @@ -1112,38 +1160,32 @@ fn add_one(struct A t, cell a) { fn add_one(struct A tfoaishjdf, cell aaewofjas) { output "hello"; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap_err(), + "Cannot define a function with the same signature more than once in the same scope: \"add_one\"" ); - let input = String::from(""); - let desired_output = String::from("12\n33"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_4() { - let program = String::from( - r#" + let program = r#" fn hello() { output "hello"; } hello(); output 10; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hello\n" ); - let input = String::from(""); - let desired_output = String::from("hello\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn function_overloads_1() { - let program = String::from( - r#" + let program = r#" fn hello(cell h) { output "hello: "; output h; @@ -1157,19 +1199,16 @@ output 10; cell g = 'g'; hello(g); output 10; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hello\nhello: g\n" ); - let input = String::from(""); - let desired_output = String::from("hello\nhello: g\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn function_overloads_1a() { - let program = String::from( - r#" + let program = r#" fn hello() { output "hello"; } @@ -1184,36 +1223,30 @@ output 10; cell g = 'g'; hello(g); output 10; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hello\nhello: g\n" ); - let input = String::from(""); - let desired_output = String::from("hello\nhello: g\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn input_1() { - let program = String::from( - " + let program = r#" cell b; input b; ++b; output b; -", +"#; + assert_eq!( + compile_and_run::(program, "A").unwrap(), + "B" ); - let input = String::from("A"); - let desired_output = String::from("B"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn input_2() { - let program = String::from( - r#" + let program = r#" cell[3] b; input b[0]; input b[1]; @@ -1228,19 +1261,16 @@ b[2]+=1; output b[2]; output b[1]; output b[0]; -"#, +"#; + assert_eq!( + compile_and_run::(program, "ABC").unwrap(), + "ABC\nDDD" ); - let input = String::from("ABC"); - let desired_output = String::from("ABC\nDDD"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn memory_1() { - let program = String::from( - r#" + let program = r#" cell[3] b = "Foo"; fn inc(cell h, cell g) { @@ -1257,19 +1287,16 @@ output 10; cell c = -1; inc(c, c); output c; -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "FooFpp\nZ" ); - let input = String::from(""); - let desired_output = String::from("FooFpp\nZ"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn memory_2() { - let program = String::from( - r#" + let program = r#" cell[3] b = [1, 2, 3]; fn drain_h(cell h) { @@ -1292,19 +1319,16 @@ cell u = 'a' - 1; cell[5] v = [8, 5, 12, 12, 15]; drain_into(u, v); output *v; -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hhh hh hello" ); - let input = String::from(""); - let desired_output = String::from("hhh hh hello"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn blocks_1() { - let program = String::from( - r#" + let program = r#" {{{{{{{ cell g = 0 + 5 + (-(-5)); output "Freidns"; @@ -1312,19 +1336,16 @@ output *v; output g; } }}}}}}} -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "Freidns\n" ); - let input = String::from(""); - let desired_output = String::from("Freidns\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn blocks_2() { - let program = String::from( - r#" + let program = r#" cell f = 'f'; output f; { @@ -1332,19 +1353,16 @@ output f; output f; } output f; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "fFf" ); - let input = String::from(""); - let desired_output = String::from("fFf"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn dimensional_arrays_1() { - let program = String::from( - r#" + let program = r#" cell[4][3] g; g[0][0] = 5 + '0'; g[0][1] = 4 + '0'; @@ -1374,19 +1392,16 @@ output g[2][0]; output g[2][1]; output g[2][2]; output g[2][3]; -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "543112320003" ); - let input = String::from(""); - let desired_output = String::from("543112320003"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_1() { - let program = String::from( - r#"; + let program = r#" struct AA { cell green; cell yellow; @@ -1400,19 +1415,16 @@ a.green = 6; a.yellow = 4; output '0' + a.green; output '0' + a.yellow; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "0064" ); - let input = String::from(""); - let desired_output = String::from("0064"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_2() { - let program = String::from( - r#"; + let program = r#" struct AA { cell green; cell yellow; @@ -1427,19 +1439,16 @@ a.green = 5; a.yellow = 2; output '0' + a.green; output '0' + a.yellow; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "3452" ); - let input = String::from(""); - let desired_output = String::from("3452"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_3() { - let program = String::from( - r#"; + let program = r#" struct AA { cell green; cell yellow; @@ -1456,19 +1465,16 @@ input_AA(a); output a.yellow; output a.green; - "#, +"#; + assert_eq!( + compile_and_run::(program, "gh").unwrap(), + "hg" ); - let input = String::from("gh"); - let desired_output = String::from("hg"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_3a() { - let program = String::from( - r#"; + let program = r#" struct AA a; fn input_AA(struct AA bbb) { @@ -1489,19 +1495,16 @@ struct AA { cell green; cell yellow; } - "#, +"#; + assert_eq!( + compile_and_run::(program, "gh").unwrap(), + "hg" ); - let input = String::from("gh"); - let desired_output = String::from("hg"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_3b() { - let program = String::from( - r#"; + let program = r#" struct AA a; fn input_AA(struct AA bbb) { @@ -1527,19 +1530,16 @@ struct AA { cell green; cell yellow; } - "#, +"#; + assert_eq!( + compile_and_run::(program, "ghpalindrome").unwrap(), + "nhg" ); - let input = String::from("ghpalindrome"); - let desired_output = String::from("nhg"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_4a() { - let program = String::from( - r#"; + let program = r#" struct AA a; input a.green; input a.yellow; @@ -1561,19 +1561,16 @@ output a.reds[1]; output a.reds[2]; output a.reds[3]; output '\n'; - "#, +"#; + assert_eq!( + compile_and_run::(program, "hellow").unwrap(), + "helowl\n" ); - let input = String::from("hellow"); - let desired_output = String::from("helowl\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_4b() { - let program = String::from( - r#"; + let program = r#" struct AA a; input a.green; input a.yellow; @@ -1589,19 +1586,16 @@ output *a.reds; output a.yellow; output a.green; output '\n'; - "#, +"#; + assert_eq!( + compile_and_run::(program, "gy0123").unwrap(), + "0123yg\n" ); - let input = String::from("gy0123"); - let desired_output = String::from("0123yg\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_4c() { - let program = String::from( - r#"; + let program = r#" struct AA a; input a.green; input a.yellow; @@ -1627,20 +1621,16 @@ output *a.sub.blues; output a.yellow; output a.green; output '\n'; - "#, +"#; + assert_eq!( + compile_and_run::(program, "gy-+t").unwrap(), + "t-+yg\n" ); - let input = String::from("gy-+t"); - let desired_output = String::from("t-+yg\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] - #[should_panic] fn structs_4d() { - let program = String::from( - r#"; + let program = r#" struct AA a; input *a.reds; @@ -1651,19 +1641,16 @@ struct AA { output a.reds[4]; output '\n'; - "#, +"#; + assert_eq!( + compile_and_run::(program, "0123a").unwrap_err(), + "Index \"[4]\" must be less than array length (4)." ); - let input = String::from("0123a"); - let desired_output = String::from("a\n"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_5() { - let program = String::from( - r#"; + let program = r#" struct AA { cell green; } @@ -1674,19 +1661,16 @@ as[1].green = 3; output '0' + as[0].green; output '0' + as[1].green; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "53" ); - let input = String::from(""); - let desired_output = String::from("53"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_5a() { - let program = String::from( - r#" + let program = r#" struct AAA[2] as; as[0].green = 5; as[1].green = 3; @@ -1697,19 +1681,16 @@ output '0' + as[1].green; struct AAA { cell green; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "53" ); - let input = String::from(""); - let desired_output = String::from("53"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_6() { - let program = String::from( - r#"; + let program = r#" struct AA { cell green; } @@ -1734,19 +1715,15 @@ input_AAs(as); output '0' + b.green; output as[0].green; output as[1].green; - "#, - ); - let input = String::from("tr"); - let desired_output = String::from("HI\n6tr"); - let output = compile_and_run(program, input).expect(""); +"#; + let output = compile_and_run::(program, "tr").expect(""); println!("{output}"); - assert_eq!(desired_output, output) + assert_eq!(output, "HI\n6tr"); } #[test] fn structs_7() { - let program = String::from( - r#"; + let program = r#" struct BB { cell green; } @@ -1782,19 +1759,15 @@ output as[1].green; output as[1].bbb[0].green; output as[1].bbb[1].green; output as[1].bbb[2].green; - "#, - ); - let input = String::from("abcdefgh"); - let desired_output = String::from("HI\ngabchdef"); - let output = compile_and_run(program, input).expect(""); +"#; + let output = compile_and_run::(program, "abcdefgh").expect(""); println!("{output}"); - assert_eq!(desired_output, output) + assert_eq!(output, "HI\ngabchdef"); } #[test] fn structs_7a() { - let program = String::from( - r#"; + let program = r#" struct BB { cell green @2; } @@ -1830,19 +1803,15 @@ output as[1].green; output as[1].bbb[0].green; output as[1].bbb[1].green; output as[1].bbb[2].green; - "#, - ); - let input = String::from("abcdefgh"); - let desired_output = String::from("HI\ngabchdef"); - let output = compile_and_run(program, input).expect(""); +"#; + let output = compile_and_run::(program, "abcdefgh").expect(""); println!("{output}"); - assert_eq!(desired_output, output) + assert_eq!(output, "HI\ngabchdef"); } #[test] fn structs_bf_1() { - let program = String::from( - r#"; + let program = r#" struct Frame { cell marker @3; cell value @0; @@ -1865,22 +1834,16 @@ vec1.frames[2].value = 'l'; bf @2 { [>.>>>] } - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "jkl" ); - let input = String::from(""); - let desired_output = String::from("jkl"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] - // TODO: fix the r_panic macro that makes this error have unescaped quotes in it (weird) - // #[should_panic(expected = r#"Subfields "marker" and "temp_cells" overlap in struct."#)] - #[should_panic] fn structs_bf_1a() { - let program = String::from( - r#"; + let program = r#" struct Frame { cell marker @2; cell value @0; @@ -1888,22 +1851,16 @@ struct Frame { } struct Frame f; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap_err(), + "Subfields \"marker\" and \"temp_cells\" overlap in struct." ); - let input = String::from(""); - let desired_output = String::from(""); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] - // TODO: fix the r_panic macro that makes this error have unescaped quotes in it (weird) - // #[should_panic(expected = r#"Subfields "marker" and "temp_cells" overlap in struct."#)] - #[should_panic] fn structs_bf_1b() { - let program = String::from( - r#"; + let program = r#" struct Frame { cell marker @-2; cell value @0; @@ -1911,20 +1868,16 @@ struct Frame { } struct Frame f; - "#, +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + "Cannot create struct field \"cell marker @-2\". Expected non-negative cell offset." ); - let input = String::from(""); - let desired_output = String::from(""); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] - #[should_panic] fn structs_bf_1c() { - let program = String::from( - r#"; + let program = r#" struct G { cell a @1; cell b @1; @@ -1936,19 +1889,16 @@ g.b = 'b'; output g.a; output g.b; - "#, +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + "Subfields \"a\" and \"b\" overlap in struct." ); - let input = String::from(""); - let desired_output = String::from("ab"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_bf_2() { - let program = String::from( - r#"; + let program = r#" struct Green { // no @0 cell cell blue @1; @@ -1960,146 +1910,122 @@ output g.blue; bf @4 { >.< } - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "55" ); - let input = String::from(""); - let desired_output = String::from("55"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_0() { - let program = String::from( - r#"; + let program = r#" output '0' + sizeof(cell); - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "1" ); - let input = String::from(""); - let desired_output = String::from("1"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_0a() { - let program = String::from( - r#"; + let program = r#" output '0' + sizeof(cell[5]); - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "5" ); - let input = String::from(""); - let desired_output = String::from("5"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_0b() { - let program = String::from( - r#"; + let program = r#" cell a; cell b[4]; output '0' + sizeof(a); output '0' + sizeof(b); output '0' + sizeof(b[2]); - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "141" ); - let input = String::from(""); - let desired_output = String::from("141"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_1() { - let program = String::from( - r#"; + let program = r#" struct Green { cell blue; } let s = sizeof(struct Green); output '0' + s; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "1" ); - let input = String::from(""); - let desired_output = String::from("1"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_1a() { - let program = String::from( - r#"; + let program = r#" struct Green { cell blue; } let s = sizeof(struct Green[3]); output '0' + s; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "3" ); - let input = String::from(""); - let desired_output = String::from("3"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_1b() { - let program = String::from( - r#"; + let program = r#" struct Green { cell blue; } let s = sizeof(struct Green[3][2]); output '0' + s; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "6" ); - let input = String::from(""); - let desired_output = String::from("6"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_2() { - let program = String::from( - r#"; + let program = r#" struct Green { cell blue; cell red; } struct Green g; output '0' + sizeof(g); - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "2" ); - let input = String::from(""); - let desired_output = String::from("2"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_3() { - let program = String::from( - r#"; + let program = r#" struct Green { cell blue; cell[5] red; @@ -2110,20 +2036,17 @@ output '0' + sizeof(g) - 13; output '0' + sizeof(g[0].blue); output '0' + sizeof(g[0].red); - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "115" ); - let input = String::from(""); - let desired_output = String::from("115"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_4() { - let program = String::from( - r#"; + let program = r#" struct Green { cell blue @2; } @@ -2131,20 +2054,17 @@ struct Green[3] g; output '0' + sizeof(struct Green); output '0' + sizeof(g); output '0' + sizeof(g[2].blue) - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "391" ); - let input = String::from(""); - let desired_output = String::from("391"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_5() { - let program = String::from( - r#"; + let program = r#" struct Blue { cell[2] blues; } @@ -2162,19 +2082,16 @@ struct Green[3] g; output '0' + sizeof(struct Green); output '0' + sizeof(g) - 17; output '0' + sizeof(g[2].blue) - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "23612" ); - let input = String::from(""); - let desired_output = String::from("23612"); - let output = compile_and_run(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] - fn memory_specifiers_1() -> Result<(), String> { - let program = String::from( - r#" + fn memory_specifiers_1() { + let program = r#" cell foo @3 = 2; { cell n = 12; @@ -2184,323 +2101,166 @@ cell foo @3 = 2; } } output foo; -"#, - ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); +"#; + let code = compile_program::(program, None).unwrap(); assert_eq!(code, ">>>++<<<++++++++++++[->>>++++++++++<<<][-]>>>."); - assert_eq!(output, "z"); - Ok(()) + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "z"); } #[test] - fn memory_specifiers_2() -> Result<(), String> { - let program = String::from( - r#" + fn memory_specifiers_2() { + let program = r#" cell a @5 = 4; cell foo @0 = 2; cell b = 10; -"#, - ); - let code = compile_program(program, None)?.to_string(); +"#; + let code = compile_program::(program, None).unwrap(); println!("{code}"); - assert!(code.starts_with(">>>>>++++<<<<<++>++++++++++")); - Ok(()) } #[test] - fn memory_specifiers_3() -> Result<(), String> { - let program = String::from( - r#" + fn memory_specifiers_3() { + let program = r#" cell a @1 = 1; cell foo @0 = 2; cell b = 3; -"#, - ); - let code = compile_program(program, None)?.to_string(); +"#; + let code = compile_program::(program, None).unwrap(); println!("{code}"); - assert!(code.starts_with(">+<++>>+++")); - Ok(()) } #[test] - fn memory_specifiers_4() -> Result<(), String> { - let program = String::from( - r#" -cell a @1,2 = 1; -cell foo @0 = 2; -cell b = 3; -"#, - ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - assert!(code.starts_with(">^^++++")); - Ok(()) - } - - #[test] - fn memory_specifiers_5() -> Result<(), String> { - let program = String::from( - r#" -cell[4][3] g @1,2; -g[0][0] = 1; -g[1][1] = 2; -g[2][2] = 3; -cell foo @0 = 2; -cell b = 3; -"#, - ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - assert!(code.starts_with(">^^[-]+>>>>>[-]++>>>>>[-]+++<<<<<<<<<<+++")); - Ok(()) - } - - #[test] - fn memory_specifiers_6() { - let program = String::from( - r#" + fn memory_specifiers_4() { + let program = r#" cell a @1 = 1; cell foo @1 = 2; cell b = 3; -"#, - ); - let code = compile_program(program, None); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @1,0 conflicts with another allocation")); - } - - #[test] - fn memory_specifiers_7() { - let program = String::from( - r#" -cell a @1,3 = 1; -cell foo @1,3 = 2; -cell b = 3; -"#, - ); - let code = compile_program(program, None); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @1,3 conflicts with another allocation")); - } - - #[test] - fn memory_specifiers_8() { - let program = String::from( - r#" -cell a @2 = 1; -cell foo @2,0 = 2; -cell b = 3; -"#, - ); - let code = compile_program(program, None); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @2,0 conflicts with another allocation")); - } - - #[test] - fn memory_specifiers_9() { - let program = String::from( - r#" -cell a @2,4 = 1; -cell[4] b @0,4; -"#, +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + "Location specifier @1 conflicts with another allocation" ); - let code = compile_program(program, None); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @0,4 conflicts with another allocation")); } #[test] - fn variable_location_specifiers_1() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_1() { + let program = r#" cell a = 'h'; bf @a {.} -"#, +"#; + assert_eq!( + compile_and_run::(program, "wxy").unwrap(), + "h" ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - let input = String::from("wxy"); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "h"); - Ok(()) } #[test] - fn variable_location_specifiers_1a() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_1a() { + let program = r#" cell[100] _; cell a = 'h'; cell[4] b; bf @a {.} -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "h" ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "h"); - Ok(()) } #[test] - fn variable_location_specifiers_2() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_2() { + let program = r#" struct Test {cell[3] a @0; cell b;} struct Test t; input *t.a; bf @t.a { [+.>] } -"#, - ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - let input = String::from("wxy"); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); +"#; + let code = compile_program::(program, None).unwrap(); assert_eq!(code, ",>,>,<<[+.>]"); - assert_eq!(output, "xyz"); - Ok(()) + assert_eq!(run_code(BVM_CONFIG_1D, &code, "wxy", None).unwrap(), "xyz"); } #[test] - fn variable_location_specifiers_2a() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_2a() { + let program = r#" struct Test {cell[3] a @0; cell b;} struct Test t; input *t.a; bf @t { [+.>] } -"#, - ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - let input = String::from("wxy"); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); +"#; + let code = compile_program::(program, None).unwrap(); assert_eq!(code, ",>,>,<<[+.>]"); - assert_eq!(output, "xyz"); - Ok(()) + assert_eq!(run_code(BVM_CONFIG_1D, &code, "wxy", None).unwrap(), "xyz"); } #[test] - fn variable_location_specifiers_3() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_3() { + let program = r#" cell[5] f @6 = "abcde"; bf @f[2] clobbers *f {.+++.} output 10; output *f; -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "cf\nabfde" ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "cf\nabfde"); - Ok(()) } #[test] - fn variable_location_specifiers_3a() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_3a() { + let program = r#" cell[4] f @8 = "xyz "; bf @f {[.>]} -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "xyz " ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "xyz "); - Ok(()) } #[test] - fn variable_location_specifiers_4() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_4() { + let program = r#" fn func(cell g) { bf @g {+.-} } cell a = '5'; func(a); -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "6" ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "6"); - Ok(()) } #[test] - fn variable_location_specifiers_4a() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_4a() { + let program = r#" fn func(cell g) { bf @g {+.-} } cell[3] a = "456"; func(a[1]); -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "6" ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "6"); - Ok(()) } #[test] - fn variable_location_specifiers_4b() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_4b() { + let program = r#" fn func(cell g) { bf @g {+.-} } @@ -2511,22 +2271,16 @@ a.r[0] = '4'; a.r[1] = '5'; a.r[2] = '6'; func(a.r[1]); -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "6" ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "6"); - Ok(()) } #[test] - fn variable_location_specifiers_4c() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_4c() { + let program = r#" fn func(struct H h) { bf @h {+.-} } @@ -2537,22 +2291,16 @@ a.r[0] = '4'; a.r[1] = '5'; a.r[2] = '6'; func(a); -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "5" ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "5"); - Ok(()) } #[test] - fn variable_location_specifiers_4d() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_4d() { + let program = r#" fn func(cell[2] g) { bf @g {+.-} } @@ -2563,105 +2311,84 @@ struct H a; a.jj.j[0] = '3'; a.jj.j[1] = '4'; func(a.jj.j); -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "4" ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "4"); - Ok(()) } #[test] - fn assertions_1() -> Result<(), String> { - let program = String::from( - r#" + fn assertions_1() { + let program = r#" cell a @0 = 5; output a; assert a equals 2; a = 0; output a; -"#, - ); - let code = compile_program(program, Some(&OPT_ALL))?.to_string(); +"#; + let code = compile_program::(program, Some(OPT_ALL)).unwrap(); println!("{code}"); - assert!(code.starts_with("+++++.--.")); - Ok(()) } #[test] - fn assertions_2() -> Result<(), String> { - let program = String::from( - r#" + fn assertions_2() { + let program = r#" cell a @0 = 2; output a; assert a unknown; a = 0; output a; -"#, - ); - let code = compile_program(program, Some(&OPT_ALL))?.to_string(); +"#; + let code = compile_program::(program, Some(OPT_ALL)).unwrap(); println!("{code}"); - assert!(code.starts_with("++.[-].")); - Ok(()) } #[test] - fn inline_brainfuck_1() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_1() { + let program = r#" bf { ,.[-] +[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+. } -"#, - ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - +"#; + let code = compile_program::(program, None).unwrap(); assert_eq!( code, ",.[-]+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+." ); - - let output = run_code(BVM_CONFIG_1D, code, String::from("~"), None); - assert_eq!(output, "~Hello, World!"); - Ok(()) + assert_eq!( + run_code(BVM_CONFIG_1D, &code, "~", None).unwrap(), + "~Hello, World!" + ); } #[test] - fn inline_brainfuck_2() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_2() { + let program = r#" // cell a @0; // cell b @1; bf @3 { ,.[-] +[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+. } -"#, - ); - let code = compile_program(program, None)?.to_string(); +"#; + let code = compile_program::(program, None).unwrap(); println!("{code}"); - assert!(code.starts_with( ">>>,.[-]+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+." )); - - let output = run_code(BVM_CONFIG_1D, code, String::from("~"), None); - assert_eq!(output, "~Hello, World!"); - Ok(()) + assert_eq!( + run_code(BVM_CONFIG_1D, &code, "~", None).unwrap(), + "~Hello, World!" + ); } #[test] - fn inline_brainfuck_3() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_3() { + let program = r#" cell[3] str @0; bf @0 clobbers *str { @@ -2679,22 +2406,16 @@ bf @0 clobbers *str { <<< } assert *str equals 0; -"#, - ); - let code = compile_program(program, None)?.to_string(); +"#; + let code = compile_program::(program, None).unwrap(); println!("{code}"); - assert!(code.starts_with(",>,>,<<[+>]<<<[.[-]>]<<<")); - - let output = run_code(BVM_CONFIG_1D, code, String::from("HEY"), None); - assert_eq!(output, "IFZ"); - Ok(()) + assert_eq!(run_code(BVM_CONFIG_1D, &code, "HEY", None).unwrap(), "IFZ"); } #[test] - fn inline_brainfuck_4() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_4() { + let program = r#" bf { // enters a line of user input // runs some embedded mastermind for each character @@ -2711,20 +2432,20 @@ bf { ,---------- ] } -"#, - ); - let code = compile_program(program, None)?.to_string(); +"#; + let code = compile_program::(program, None).unwrap(); println!("{code}"); - - let output = run_code(BVM_CONFIG_1D, code, String::from("line of input\n"), None); - assert_eq!(output, "lmijnoef !opfg !ijnopquvtu"); - Ok(()) + assert!(code.starts_with(",----------[++++++++++")); + assert!(code.ends_with("[-],----------]")); + assert_eq!( + run_code(BVM_CONFIG_1D, &code, "line of input\n", None).unwrap(), + "lmijnoef !opfg !ijnopquvtu" + ); } #[test] - fn inline_brainfuck_5() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_5() { + let program = r#" // external function within the same file, could be tricky to implement fn quote(cell n) { // H 'H' @@ -2749,20 +2470,20 @@ bf { ,---------- ] } -"#, - ); - let code = compile_program(program, None)?.to_string(); +"#; + let code = compile_program::(program, None).unwrap(); println!("{code}"); - - let output = run_code(BVM_CONFIG_1D, code, String::from("hello\n"), None); - assert_eq!(output, "'h'\n'e'\n'l'\n'l'\n'o'\n"); - Ok(()) + assert!(code.starts_with(",----------[++++++++++")); + assert!(code.ends_with("[-],----------]")); + assert_eq!( + run_code(BVM_CONFIG_1D, &code, "hello\n", None).unwrap(), + "'h'\n'e'\n'l'\n'l'\n'o'\n" + ); } #[test] - fn inline_brainfuck_6() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_6() { + let program = r#" cell b = 4; bf { @@ -2772,96 +2493,83 @@ bf { } ++-- } -"#, +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + "No variable found in scope with name \"b\"." ); - let result = compile_program(program, None); - assert!(result.is_err()); - - Ok(()) } #[test] - fn inline_brainfuck_7() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_7() { + let program = r#" bf { ,>,>, << {{{{{{cell g @5 = 1;}}}}}} } - "#, +"#; + assert_eq!( + compile_program::(program, None).unwrap(), + ",>,>,<<>>>>>+[-]<<<<<" ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - assert_eq!(code, ",>,>,<<>>>>>+[-]<<<<<"); - Ok(()) } - #[test] - fn inline_2d_brainfuck() -> Result<(), String> { - let program = String::from( - r#" - bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.} - "#, - ); - let code = compile_program(program, None)?.to_string(); + #[test] + fn inline_2d_brainfuck() { + let program = r#" +bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.} +"#; + let code = compile_program::(program, None).unwrap(); assert_eq!( code, ",.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+." ); - - let output = run_code(BVM_CONFIG_2D, code, String::from("~"), None); - assert_eq!(output, "~Hello, World!"); - Ok(()) + assert_eq!( + run_code(BVM_CONFIG_2D, &code, "~", None).unwrap(), + "~Hello, World!" + ); } + #[test] - #[should_panic(expected = "Invalid Inline Brainfuck Characters in vvstvv")] fn invalid_inline_2d_brainfuck() { - let program = String::from( - r#" - bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvstvv.+++.------.vv-.^^^^+.} - "#, + let program = r#" +bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvstvv.+++.------.vv-.^^^^+.} +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + "Unexpected character `s` in Brainfuck clause." ); - let _result = compile_program(program, None); } #[test] - #[should_panic(expected = "2D Brainfuck currently disabled")] fn inline_2d_brainfuck_disabled() { - run_code( - BVM_CONFIG_1D, - String::from( + assert_eq!( + run_code( + BVM_CONFIG_1D, ",.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.", - ), - String::from("~"), - None, + "~", + None, + ) + .unwrap_err(), + "2D Brainfuck currently disabled" ); } + #[test] - fn constant_optimisations_1() -> Result<(), String> { - let program = String::from( - " + fn constant_optimisations_1() { + let program = r#" output 'h'; - ", - ); - let input = String::from(""); - let desired_output = String::from("h"); - - let code = compile_program(program, Some(&OPT_ALL))?; - println!("{}", code.clone().to_string()); - assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, code.to_string(), input, None) - ); - - Ok(()) +"#; + let code = compile_program::(program, Some(OPT_ALL)).unwrap(); + println!("{code}"); + assert!(code.len() < 35); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "h"); } #[test] - fn constant_optimisations_2() -> Result<(), String> { - let program = String::from( - r#" + fn constant_optimisations_2() { + let program = r#" cell[15] arr @1; cell a = 'G'; cell b = a + 45; @@ -2869,44 +2577,108 @@ output b; b -= 43; output b; output a + 3; - "#, - ); - let input = String::from(""); - let desired_output = String::from("tIJ"); - - let code = compile_program(program, Some(&OPT_ALL))?.to_string(); - println!("{}", code); - assert_eq!(desired_output, run_code(BVM_CONFIG_1D, code, input, None)); - - Ok(()) +"#; + let code = compile_program::(program, Some(OPT_ALL)).unwrap(); + println!("{code}"); + assert!(code.len() < 400); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "tIJ"); } + + // TODO: remove the need for this #[test] - #[should_panic(expected = "Memory Allocation Method not implemented")] fn unimplemented_memory_allocation() { - let program = String::from( - r#" - cell[15] arr @1; - cell a = 'G'; - "#, - ); + let program = r#" +cell[15] arr @1; +cell a = 'G'; +"#; let cfg = MastermindConfig { optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_unreachable_loops: false, optimise_constants: false, optimise_empty_blocks: false, memory_allocation_method: 128, enable_2d_grid: false, }; - let _code = compile_program(program, Some(&cfg)); + assert_eq!( + compile_program::(program, Some(cfg)).unwrap_err(), + "Memory allocation method 128 not implemented." + ); + } + + #[test] + fn memory_specifiers_2d_1() { + let program = r#" +cell a @(1, 2) = 1; +cell foo @0 = 2; +cell b = 3; +"#; + assert_eq!( + compile_program::(program, None).unwrap(), + ">^^++++" + ); } + #[test] - fn tiles_memory_allocation_1() -> Result<(), String> { - let program = String::from( - r#" + fn memory_specifiers_2d_2() { + let program = r#" +cell[4][3] g @(1, 2); +g[0][0] = 1; +g[1][1] = 2; +g[2][2] = 3; +cell foo @0 = 2; +cell b = 3; +"#; + assert_eq!( + compile_program::(program, None).unwrap(), + ">^^[-]+>>>>>[-]++>>>>>[-]+++<<<<<<<<<<+++" + ); + } + + #[test] + fn memory_specifiers_2d_3() { + let program = r#" +cell a @(1, 3) = 1; +cell foo @(1, 3) = 2; +cell b = 3; +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + "Location specifier @(1, 3) conflicts with another allocation" + ); + } + + #[test] + fn memory_specifiers_2d_4() { + let program = r#" +cell a @2 = 1; +cell foo @(2, 0) = 2; +cell b = 3; +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + "Location specifier @(2, 0) conflicts with another allocation" + ); + } + + #[test] + fn memory_specifiers_2d_5() { + let program = r#" +cell a @(2, 4) = 1; +cell[4] b @(0, 4); +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + "Location specifier @(0, 4) conflicts with another allocation" + ); + } + + #[test] + fn tiles_memory_allocation_1() { + let program = r#" cell a = 1; cell b = 1; cell c = 1; @@ -2916,19 +2688,15 @@ cell f = 1; cell h = 1; cell i = 1; cell j = 1; - "#, +"#; + assert_eq!( + compile_program::(program, Some(OPT_NONE_2D_TILES)).unwrap(), + "+vv+^^+>vv+^+^+" ); - let desired_output = String::from("+vv+^^+>vv+^+^+"); - - let code = compile_program(program, Some(&OPT_NONE_TILES))?.to_string(); - assert_eq!(desired_output, code); - - Ok(()) } #[test] - fn tiles_memory_allocation_2() -> Result<(), String> { - let program = String::from( - r#" + fn tiles_memory_allocation_2() { + let program = r#" cell a = '1'; cell b = '2'; cell c = '3'; @@ -2947,38 +2715,32 @@ output f; output g; output h; output i; - "#, +"#; + let code = + compile_program::(program, Some(OPT_NONE_2D_TILES)).unwrap(); + println!("{code}"); + assert!(code.contains("v") || code.contains("^")); + assert_eq!( + run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), + "123456789" ); - let input = String::from(""); - let desired_output = String::from("123456789"); - - let code = compile_program(program, Some(&OPT_NONE_TILES))?.to_string(); - println!("{}", code); - assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); - - Ok(()) } #[test] fn tiles_memory_allocation_3() { - let program = String::from( - r#" -cell a @2,4 = 1; -cell[4] b @0,4; -"#, + let program = r#" +cell a @(2, 4) = 1; +cell[4] b @(0, 4); +"#; + assert_eq!( + compile_program::(program, Some(OPT_NONE_2D_TILES)).unwrap_err(), + "Location specifier @(0, 4) conflicts with another allocation" ); - let code = compile_program(program, Some(&OPT_NONE_TILES)); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @0,4 conflicts with another allocation")); } #[test] - fn tiles_memory_allocation_4() -> Result<(), String> { - let program = String::from( - r#" + fn tiles_memory_allocation_4() { + let program = r#" cell a @2 = 1; cell[4] b; a = '5'; @@ -2991,20 +2753,17 @@ output b[1]; output b[2]; output b[3]; output a; -"#, - ); - let code = compile_program(program, Some(&OPT_NONE_TILES))?.to_string(); - println!("{}", code); - let input = String::from(""); - let desired_output = String::from("12345"); - assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); - Ok(()) +"#; + let code = + compile_program::(program, Some(OPT_NONE_2D_TILES)).unwrap(); + println!("{code}"); + assert!(code.contains("v") || code.contains("^")); + assert_eq!(run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "12345"); } #[test] - fn zig_zag_memory_allocation_1() -> Result<(), String> { - let program = String::from( - r#" + fn zig_zag_memory_allocation_1() { + let program = r#" cell a = 1; cell b = 1; cell c = 1; @@ -3014,19 +2773,16 @@ cell f = 1; cell h = 1; cell i = 1; cell j = 1; - "#, +"#; + assert_eq!( + compile_program::(program, Some(OPT_NONE_2D_ZIG_ZAG)).unwrap(), + "+>+<^+>>v+<^+<^+>>>vv+<^+<^+" ); - let desired_output = String::from("+>+<^+>>v+<^+<^+>>>vv+<^+<^+"); - - let code = compile_program(program, Some(&OPT_NONE_ZIG_ZAG))?.to_string(); - assert_eq!(desired_output, code); - - Ok(()) } + #[test] - fn zig_zag_memory_allocation_2() -> Result<(), String> { - let program = String::from( - r#" + fn zig_zag_memory_allocation_2() { + let program = r#" cell a = '1'; cell b = '2'; cell c = '3'; @@ -3045,38 +2801,33 @@ output f; output g; output h; output i; - "#, +"#; + let code = + compile_program::(program, Some(OPT_NONE_2D_ZIG_ZAG)).unwrap(); + println!("{code}"); + assert!(code.contains("v") || code.contains("^")); + assert_eq!( + run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), + "123456789" ); - let input = String::from(""); - let desired_output = String::from("123456789"); - - let code = compile_program(program, Some(&OPT_NONE_ZIG_ZAG))?.to_string(); - println!("{}", code); - assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); - - Ok(()) } #[test] fn zig_zag_memory_allocation_3() { - let program = String::from( - r#" -cell a @2,4 = 1; -cell[4] b @0,4; -"#, + let program = r#" +cell a @(2, 4) = 1; +cell[4] b @(0, 4); +"#; + assert_eq!( + compile_program::(program, Some(OPT_NONE_2D_ZIG_ZAG)) + .unwrap_err(), + "Location specifier @(0, 4) conflicts with another allocation" ); - let code = compile_program(program, Some(&OPT_NONE_ZIG_ZAG)); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @0,4 conflicts with another allocation")); } #[test] - fn zig_zag_memory_allocation_4() -> Result<(), String> { - let program = String::from( - r#" + fn zig_zag_memory_allocation_4() { + let program = r#" cell a @2 = 1; cell[4] b; a = '5'; @@ -3089,20 +2840,17 @@ output b[1]; output b[2]; output b[3]; output a; -"#, - ); - let code = compile_program(program, Some(&OPT_NONE_ZIG_ZAG))?.to_string(); - println!("{}", code); - let input = String::from(""); - let desired_output = String::from("12345"); - assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); - Ok(()) +"#; + let code = + compile_program::(program, Some(OPT_NONE_2D_ZIG_ZAG)).unwrap(); + println!("{code}"); + assert!(code.contains("v") || code.contains("^")); + assert_eq!(run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "12345"); } #[test] - fn spiral_memory_allocation_1() -> Result<(), String> { - let program = String::from( - r#" + fn spiral_memory_allocation_1() { + let program = r#" cell a = 1; cell b = 1; cell c = 1; @@ -3112,19 +2860,15 @@ cell f = 1; cell h = 1; cell i = 1; cell j = 1; - "#, +"#; + assert_eq!( + compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap(), + "^+>+v+<+<+^+^+>+>+" ); - let desired_output = String::from("^+>+v+<+<+^+^+>+>+"); - - let code = compile_program(program, Some(&OPT_NONE_SPIRAL))?.to_string(); - assert_eq!(desired_output, code); - - Ok(()) } #[test] - fn spiral_memory_allocation_2() -> Result<(), String> { - let program = String::from( - r#" + fn spiral_memory_allocation_2() { + let program = r#" cell a = '1'; cell b = '2'; cell c = '3'; @@ -3143,38 +2887,33 @@ output f; output g; output h; output i; - "#, +"#; + let code = + compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap(); + println!("{code}"); + assert!(code.contains("v") || code.contains("^")); + assert_eq!( + run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), + "123456789" ); - let input = String::from(""); - let desired_output = String::from("123456789"); - - let code = compile_program(program, Some(&OPT_NONE_SPIRAL))?.to_string(); - println!("{}", code); - assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); - - Ok(()) } + // TODO: decipher this #[test] fn spiral_memory_allocation_3() { - let program = String::from( - r#" -cell a @2,4 = 1; -cell[4] b @0,4; -"#, + let program = r#" +cell a @(2, 4) = 1; +cell[4] b @(0, 4); +"#; + assert_eq!( + compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap_err(), + "Location specifier @(0, 4) conflicts with another allocation" ); - let code = compile_program(program, Some(&OPT_NONE_SPIRAL)); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @0,4 conflicts with another allocation")); } #[test] - fn spiral_memory_allocation_4() -> Result<(), String> { - let program = String::from( - r#" + fn spiral_memory_allocation_4() { + let program = r#" cell a @2 = 1; cell[4] b; a = '5'; @@ -3187,13 +2926,11 @@ output b[1]; output b[2]; output b[3]; output a; -"#, - ); - let code = compile_program(program, Some(&OPT_NONE_SPIRAL))?.to_string(); - println!("{}", code); - let input = String::from(""); - let desired_output = String::from("12345"); - assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); - Ok(()) +"#; + let code = + compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap(); + println!("{code}"); + assert!(code.contains("v") || code.contains("^")); + assert_eq!(run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "12345"); } } diff --git a/compiler/src/tokeniser.rs b/compiler/src/tokeniser.rs deleted file mode 100644 index 9bc72ce..0000000 --- a/compiler/src/tokeniser.rs +++ /dev/null @@ -1,226 +0,0 @@ -// TODO: refactor this tokeniser, needs some fixes and could be made simpler/cleaner - -use regex_lite::Regex; - -use crate::macros::macros::r_assert; - -pub fn tokenise(source: &String) -> Result, String> { - let stripped = source - .lines() - .map(strip_line) - .collect::>() - .join(" "); - - let mappings = [ - (" ", Token::None), - (";", Token::Semicolon), - ("output", Token::Output), - ("input", Token::Input), - // ("#debug", Token::Debug), - // ("let", Token::Let), - ("cell", Token::Cell), - ("struct", Token::Struct), - ("=", Token::EqualsSign), - ("while", Token::While), - ("drain", Token::Drain), - ("into", Token::Into), - // ("clear", Token::Clear), - // ("loop", Token::Loop), - ("else", Token::Else), - ("copy", Token::Copy), - ("bf", Token::Bf), - ("clobbers", Token::Clobbers), - ("assert", Token::Assert), - ("equals", Token::Equals), - ("unknown", Token::Unknown), - // ("call", Token::Call), - // ("bool", Token::Bool), - // ("free", Token::Free), - // ("push", Token::Push), - // ("deal", Token::Deal), - // ("def", Token::Def), - ("fn", Token::Fn), - // ("int", Token::Int), - // ("add", Token::Add), - // ("sub", Token::Sub), - // ("pop", Token::Pop), - ("if", Token::If), - ("not", Token::Not), - ("else", Token::Else), - ("{", Token::OpenBrace), - ("}", Token::ClosingBrace), - ("[", Token::OpenSquareBracket), - ("]", Token::ClosingSquareBracket), - ("(", Token::OpenParenthesis), - (")", Token::ClosingParenthesis), - ("<", Token::LessThan), - (">", Token::MoreThan), - ("^", Token::UpToken), - ("true", Token::True), - ("false", Token::False), - (",", Token::Comma), - (".", Token::Dot), - ("*", Token::Asterisk), - ("@", Token::At), - ("-", Token::Minus), - ("+", Token::Plus), - ]; - // check for numbers and variables - let num_re = Regex::new(r#"^[0-9]+"#).unwrap(); - let name_re = Regex::new(r#"^[a-zA-Z_]\w*"#).unwrap(); - // string regex taken from chatgpt - let str_re = Regex::new(r#"^"(?:[^"\\]|\\.)+""#).unwrap(); - // char regex taken from chatgpt again - let chr_re = Regex::new(r#"^'(?:[^'\\]|\\.)'"#).unwrap(); - - let mut tokens: Vec = Vec::new(); - - let mut chr_idx = 0usize; - while chr_idx < stripped.len() { - let remaining = &stripped[chr_idx..]; - - let mut found = false; - - ///////// - if let Some(num_capture) = num_re.captures(remaining) { - found = true; - let substring = String::from(&num_capture[0]); - chr_idx += substring.len(); - tokens.push(Token::Digits(substring)); - } else if let Some(name_capture) = name_re.captures(remaining) { - found = true; - let substring = String::from(&name_capture[0]); - if mappings - .iter() - // this could be made more efficient if we had a table of keywords vs symbols - .find(|(keyword, _)| substring == *keyword) - .is_some() - { - found = false; - } else { - chr_idx += substring.len(); - tokens.push(Token::Name(substring)); - } - } else if let Some(str_capture) = str_re.captures(remaining) { - found = true; - let substring = String::from(&str_capture[0]); - // not the most efficient way, this simply removes the quote characters - // could refactor this - chr_idx += substring.len(); - let unescaped: String = serde_json::from_str(&substring) - .or(Err("Could not unescape string literal in tokenisation due to serde error, this should never occur."))?; - tokens.push(Token::String(unescaped)); - } else if let Some(chr_capture) = chr_re.captures(remaining) { - found = true; - let chr_literal = String::from(&chr_capture[0]); - // see above - chr_idx += chr_literal.len(); - // this code sucks, TODO: refactor - // make a new double-quoted string because serde json doesn't like single quotes and I can't be bothered making my own unescaping function - let escaped_string = - String::new() + "\"" + &chr_literal[1..(chr_literal.len() - 1)] + "\""; - let unescaped: String = serde_json::from_str(&escaped_string) - .or(Err("Could not unescape character literal in tokenisation due to serde error, this should never occur."))?; - // might need to change this for escaped characters (TODO) - r_assert!(unescaped.len() == 1, "Character literals must be length 1"); - tokens.push(Token::Character(unescaped.chars().next().unwrap())); - } - ///////// - - if !found { - for (text, token) in mappings.iter() { - if remaining.starts_with(*text) { - tokens.push(token.clone()); - chr_idx += (*text).len(); - found = true; - break; - } - } - } - r_assert!( - found, - "Unknown token found while tokenising program: \"{remaining}\"" - ); - } - - Ok(tokens - .into_iter() - .filter(|t| match t { - Token::None => false, - _ => true, - }) - // stick a None token on the end to fix some weird parsing errors (seems silly but why not?) - .chain([Token::None]) - .collect()) -} - -fn strip_line(line: &str) -> String { - let mut stripped = line; - // remove comments - let split = line.split_once("//"); - if let Some((one, _comment)) = split { - stripped = one; - } - - // remove excess whitespace - stripped - .trim() - .split_whitespace() - .collect::>() - .join(" ") -} - -#[derive(Debug, Clone, PartialEq)] -pub enum Token { - None, - Output, - Input, - // Def, - Fn, - // Let, - Cell, - Struct, - // Assert, - // Free, - While, - If, - Not, - Else, - // Loop, - // Break, - OpenBrace, - ClosingBrace, - OpenSquareBracket, - ClosingSquareBracket, - OpenParenthesis, - ClosingParenthesis, - LessThan, - MoreThan, - Comma, - Dot, - Asterisk, - At, - Copy, - Drain, - Into, - Bf, - Clobbers, - Assert, - Equals, - Unknown, - // Push, - // Pop, - // Deal, - // Debug, - Name(String), - Digits(String), - String(String), - Character(char), - True, - False, - Minus, - Plus, - EqualsSign, - Semicolon, - UpToken, -} diff --git a/devguide.md b/devguide.md new file mode 100644 index 0000000..a77ccba --- /dev/null +++ b/devguide.md @@ -0,0 +1,54 @@ +## Mastermind Development and Setup + +### Quickstart: + +- Install Rust/Cargo and Node/NPM. +- Install Yarn: `npm i --global yarn`. +- Run `yarn`. +- Run `yarn build:wasm`. +- Run `yarn build:grammar`. +- Run `yarn dev`, then follow the link to http://localhost:5173. + +Pushes to _dev_ and _main_ are published to https://staging.mastermind.lostpixels.org and https://mastermind.lostpixels.org respectively. + +### Overview: + +This repository contains two main components: the compiler and the web IDE. There are GitHub Actions workflows which build, test, and deploy the web IDE (with bundled compiler) to Firebase Web Hosting. + +#### Compiler + +The `./compiler` subdirectory contains a Cargo (Rust) package, ensure Rust is installed. + +The compiler codebase has two main entrypoints: `main.rs` and `lib.rs`, for the command-line and WASM compilation targets respectively. All other Rust source files are common between compilation targets. + +Key files to look at: + +- `tokeniser.rs`: tokenises the raw text files into Mastermind syntax tokens. +- `parser.rs`: parses strings of tokens into higher-level Mastermind clauses. +- `compiler.rs`: compiles the high-level clauses into a list of basic instructions akin to an intermediate representation (IR). +- `builder.rs`: takes the basic instructions from the compiler and builds the final Brainfuck program. + +Some key commands: + +(from within the `./compiler` subdirectory) + +- `cargo run -- -h`: runs the command-line compiler module and displays command help information +- `cargo test`: runs the automated test suite +- `cargo build`: builds the command-line module +- `wasm-pack build`: builds the WASM module + +#### Web IDE + +The project root directory `package.json`/`yarn.lock` defines a Node package managed with Yarn. Most important commands or behaviours are defined as `npm run` or `yarn` scripts within `package.json`. + +Ensure Node is installed, then ensure Yarn is installed with `npm i --global yarn`. + +The web IDE is a SolidJS app using TypeScript/TSX, and Vite as a bundler. The text editing portions of the UI are provided by the _codemirror_ plugin, and syntax highlighting is defined in the included _lezer_ grammar: `./src/lexer/mastermind.grammar`. + +Some key commands: + +- `yarn`: installs npm packages +- `yarn build:wasm`: builds the compiler WASM module +- `yarn build:grammar`: compiles the lezer grammar to JS for use in codemirror +- `yarn dev`: runs the SolidJS app in a local Vite dev server +- `yarn build`: builds the SolidJS app diff --git a/docs/brainfuck.md b/docs/brainfuck.md deleted file mode 100644 index cb2fbce..0000000 --- a/docs/brainfuck.md +++ /dev/null @@ -1,34 +0,0 @@ -### Brainfuck - -Brainfuck is an esoteric programming language, originally designed as a theoretical example of a Turing complete language with an extremely minimal compiler. The name is due to its difficulty, it is significantly more difficult to create complex programs than in any popular modern language. - -### Specification - -When a Brainfuck program is run, it operates on a array/tape of cells, performing operations on the tape. Each cell contains an integer, initialised to 0 by default. The program operates on one cell at a time based on the position of a "tape head". Brainfuck supports the following operations: - -- `+`: increments the value of the current cell -- `-`: decrement the value of the current cell -- `>`: move the tape head one cell to the right -- `<`: move the tape head one cell to the left -- `.`: output the current cell as a byte to stdout -- `,`: input a byte from stdin, overwriting the current cell -- `[`: jump to the corresponding `]` if the current cell is 0 -- `]`: jump to the corresponding `[` if the current cell is not 0 - -A Brainfuck program consists of a list of these commands, which are executed sequentially. The program terminates if the final operation in the list is executed. - -### Interpreter Implementation Details - -The Mastermind IDE and compiler library contains an implementation of a Brainfuck interpreter. This implementation is intended to match the behaviour of the most popular Brainfuck implementations: - -#### 8-bit Wrapping Cells - -In this implementation, each cell is an 8-bit integer that wraps if an increment or decrement operation overflows or underflows. - -E.g. given the current tape cell value is `255`, after an increment (`+`), the cell value is now `0`. - -Similarly: `0`, after a decrement (`-`) becomes `255` - -#### Infinite Bidirectional Tape - -In this implementation, the tape extends infinitely in both directions. diff --git a/docs/conditionals.md b/docs/conditionals.md deleted file mode 100644 index 664d54b..0000000 --- a/docs/conditionals.md +++ /dev/null @@ -1,27 +0,0 @@ -### Conditionals - -Mastermind supports basic `if`/`else` statements. An `if` statement takes in a single cell expression, if the expression is evaluated to be truthy, then the `if` block is executed, otherwise the optional `else` block is executed. This behaviour can be inverted using the `not` keyword. - -``` -if 13 { - output "13"; -} - -if not true { - // unreachable -} - -cell var = 4; -if var { - output "true"; -} else { - output "false"; -} - -// typical equivalence use-case: -if not var - 10 { - // == -} else { - // != -} -``` diff --git a/docs/functions.md b/docs/functions.md deleted file mode 100644 index a1378e6..0000000 --- a/docs/functions.md +++ /dev/null @@ -1,99 +0,0 @@ -### Functions - -Mastermind supports a minimal functions system: Functions can be defined with a name and a fixed number of typed arguments. - -``` -fn newline() { output '\n'; } - -fn print_zeros(cell num) { - copy num { - output '0'; - } - newline(); -} - -// expressions as arguments are currently not supported, -// i.e. print_zeros(9) -cell g = 9; -print_zeros(g); -``` - -Functions are in-lined at compile-time, and all arguments are passed by reference. Values can be returned by editing the arguments, or editing variables in an outer scope, although the latter makes a function less portable. - -``` -fn is_zero(cell in, cell out) { - out = true; - if in { - out = false; - } -} - -cell value = 'h'; -cell falsy; -is_zero(value, falsy); -``` - -Example showing a function reading a variable from an outer scope: - -``` -fn print_global_g(cell count) { - copy count { - output g; - output ' '; - } -} - -cell g = 'g'; -cell count = 11; -print_global_g(count); -// g g g g g g g g g g g - -{ - // inner scope with a new 'g' allocation - cell g = 'G'; - count = 4; - print_global_g(count); - // G G G G -} - -// same call again, now the inner 'G' has been freed -print_global_g(count); -// g g g g -``` - -#### Structs and Overloads - -Example of supported behaviour: - -``` -fn func1() { - output '1'; -} -fn func1(cell a) { - output '2'; -} -fn func1(cell a, cell b) { - output '3'; -} -struct X { cell a; } -fn func1(struct X x) { - output '4'; -} -struct Y { cell a; } -fn func1(struct Y y) { - output '5'; -} -fn func1(cell a, struct X x, struct Y y) { - output '6'; -} -cell n; -struct X x; -struct Y y; -func1(); -func1(n); -func1(n, n); -func1(x); -func1(y); -func1(n, x, y); -// 123456 -``` diff --git a/docs/inlinebrainfuck.md b/docs/inlinebrainfuck.md deleted file mode 100644 index b2cea17..0000000 --- a/docs/inlinebrainfuck.md +++ /dev/null @@ -1,155 +0,0 @@ -### In-line Brainfuck - -In-line Brainfuck allows the programmer to define custom behaviour as if writing raw Brainfuck, much in the same way as C has in-line assembly syntax. - -``` -// This is its most basic form: -// find the next cell that equals -1 -bf { - +[->+]- -} - -// This is its more advanced form: -// input a line of lowercase letters and output the uppercase version -// this is an intentionally inefficient example -bf @3 clobbers var *spread_var etc { - ,----------[++++++++++>,----------] - <[<]> - [ - { - cell g @0; - assert g unknown; - output g + ('A' - 'a'); - // embedded Mastermind! - } - > - ] - // now clear and return - <[[-]<]> -} -``` - -It is the programmer's responsibility to clear used cells and return back to the cell in which they started the in-line Brainfuck context. If the programmer does not do this, any mastermind code after the in-line Brainfuck command will likely break. - -#### Memory location specifiers - -For hand-tuning optimisations and in-line Brainfuck that reads from Mastermind variables, you can specify the location on the Brainfuck tape: - -``` -cell var @3 = 4; -// compiled: >>>++++ - -bf @4 { - <><><> -} -// compiled: >>>><><><> -``` - -Alternatively if using the 2D grid you can use a comma seperated list with a second value: - -``` - -bf @4,3 { - <><><> -} -// compiled: >>>>^^^<><><> -``` - -#### Clobbering and Assertions - -Mastermind will try to predict the value of cells at compile-time, so it can prevent unnecessary cell clean-ups and unreachable code (with optimisations turned on). If your in-line Brainfuck affects existing Mastermind variables, you should tell the compiler using the `clobbers` keyword, the syntax is similar to the `drain into` list: - -``` -bf clobbers var *spread_var other_var etc {} -``` - -The compiler will now assume nothing about the values of those variables afterwards. - -If instead you want to tell the compiler specifically that a variable has become a certain value, you can use `assert`: - -``` -assert var equals 3; -// most common use cases: -assert var equals 0; -assert var unknown; -``` - -Asserting a variable as `unknown` is equivalent to clobbering. - -#### Embedded Mastermind - -You can embed high-level Mastermind code within a Brainfuck context, this allows you to control precisely what the generated Brainfuck code is doing, whilst also taking advantage of the syntax features of Mastermind. - -``` -cell sum @0; - -bf @0 { - >> - // read input (until eof) to the tape, nullifying any spaces or newlines - // (this is probably not a good practical example, ideas are appreciated) - ,[ - { - cell c @0; - assert c unknown; // needed otherwise the compiler assumes c = 0 - - if not (c - '\n') { - c = 0; - } - if not (c - ' ') { - c = 0; - } - } - >, - ] -} -``` - -The compiler cannot guarantee the global head position at compile time within an in-line Brainfuck context. Therefore memory location specifiers are relative to the current embedded Mastermind context, not the entire program. - -Also, top-level variables are not cleared by default in Mastermind contexts, this allows you to "leave" variables in cells for your Brainfuck to use. If you want variables in your embedded Mastermind to be automatically cleared, you can open a scope at the top level: - -``` -bf { - ++----++[][][<><><>] // the program doesn't matter for this example - { - // variables here will not be cleared - cell g @2; - assert g unknown; - { - // variables here will be cleared - let b = 32; - } - } - {{ - // self-cleaning Mastermind code here - }} -} -``` - -#### Craziness - -You can put in-line Brainfuck inside your embedded Mastermind. - -``` -bf { - ++++[ - { - cell i @0; - assert i unknown; - cell j @1 = i + 1; - - bf @1 { - [.+] - { - // even more layers are possible - bf { - { - output "h" - } - } - } - } - } - -] -} -``` diff --git a/docs/intro.md b/docs/intro.md deleted file mode 100644 index 29aeb35..0000000 --- a/docs/intro.md +++ /dev/null @@ -1,19 +0,0 @@ -Mastermind is a programming language designed to compile to the well-known esoteric language "Brainfuck". - -Brainfuck is essentially a modern interpretation of the classical Turing machine. It consists of a tape of 8-bit values, with simple increment/decrement, move left/right, input/output, and looping operations. The full language only uses 8 control characters: `+-><.,[]`. - -Imagine if C was designed for computer architectures that run Brainfuck, that is what Mastermind is intended to be. - -## Contents - -This documentation currently includes the following articles: - -- Brainfuck -- Variables -- Conditionals -- Loops -- Functions -- Inline Brainfuck -- Standard Library -- 2D Mastermind -- Optimisations diff --git a/docs/loops.md b/docs/loops.md deleted file mode 100644 index adbf9c9..0000000 --- a/docs/loops.md +++ /dev/null @@ -1,106 +0,0 @@ -Looping in Mastermind has 3 main forms. These are the: - -- While Loop -- Drain Loop -- Copy Loop - -all 3 looping styles are essentially variations of a while loop - -## While Loop - -The simplest is the `while` loop, which only supports cell references, currently not expressions: - -``` -while var { - //do stuff - var -= 1; - //etc -} -``` - -## Drain Loop - -The `drain` loop is a form of syntax sugar for a self decrementing while loop. This form of loop is extremely common in Brainfuck -so it has been shortened with this syntax - -``` -drain var { - // do stuff -} -``` - -shorthand for the following: - -``` -while var { - // do stuff - var -= 1; -} -``` - -This destructively loops as many times as the value in the cell being referenced, this can be used with expressions: - -drain 10 {} - -drain var - 6 {} - -Drain additionally supports the ability to add a variable `into` multiple other variables - -``` -drain var into other_var other_var_2 *spread_array etc; -``` - -Equivalent to: - -``` -drain var { - other_var += 1; - other_var_2 += 1; - spread_array[0] += 1; - spread_array[1] += 1; - spread_array[2] += 1; - // ... -} - -// example of typical "for loop": -cell i; -drain 10 into i { - output '0' + i; // inefficient for the example -} -// "0123456789" -// equivalent to the following: -cell i = 0; -cell N = 10; -while N { - output '0' + i; - i += 1; - N -= 1; -} -``` - -## Copy Loop - -The `copy` loop is similar to a `drain` loop however it is designed to preserve the initial state of the loop variable. -A copy loop is shorthand designed to replace the usage of a temporary variable in a drain loop. - -``` -copy var { - // do stuff -} -``` - -Equivalent to: - -``` -cell temp = var; -while temp { - // do stuff - temp -= 1; -} -``` - -You can also `copy into` multiple other variables, similar to the `drain` loop: - -``` -copy var into other_var other_var_2 *spread_array etc; -``` diff --git a/docs/optimisations.md b/docs/optimisations.md deleted file mode 100644 index 716060c..0000000 --- a/docs/optimisations.md +++ /dev/null @@ -1,72 +0,0 @@ -### Optimisations - -The optimisations in the Mastermind compiler are aimed at reducing the compiled Brainfuck code length, not necessarily execution speed. This is due to the original goal of the project: Code Golf in Brainfuck. - -#### Cell Clearing - -This optimises the clearing of cells by tracking their values at compile-time. For instance, if a cell can be proven at compile-time to have the value `2`, it is more efficient to clear with `--`, than the typical Brainfuck clear: `[-]`. - -#### Constants - -When large values are added in Brainfuck, the naive approach is to use the increment `-` operator for as many times as needed. The constants optimiser will use multiplication to shorten the code needed to add/subtract large values. Example: the value `45` can be achieved by either `+++++++++++++++++++++++++++++++++++++++++++++` or the shorter: `+++++[<+++++++++>-]>`. - -#### Empty Blocks - -This detects if a code block is empty, and does not compile the clause associated. This is helpful for `if` statements and `copy` loops especially, as those can imply extra overhead for copying cells. - -#### Generated Code - -This is a final pass optimisation that operates directly on Brainfuck code, optimising subsets of programs which can be shortened while still guaranteeing equivalent behaviour. Example: - -``` ---->>><<<++ -``` - -Is equivalent to: - -``` -- -``` - -It is difficult to analyse the behaviour of a Brainfuck program at compile time, so this optimiser is limited to subsets of a program's operations between I/O operations and loops (with exception). Example: - -``` -cell h = 4; -cell j = 3; - -h += 10; - -drain 10 { - j = 5; - h += 4; - j += 1; -} -``` - -Compiles to: - -``` -++++>+++<++++++++++>>++++++++++[<+<++++>[-]+++++>-] -``` - -After optimisation: - -``` -++++++++++++++>+++>++++++++++[-<[-]+++++<++++>>] -``` - -For the 2D compiler extensions, this system can use an exhaustive search to determine the least movement between cells. This could become slow depending on the project, so it can be configured to use a greedy approach. This is done via the _Generated Code Permutations_ setting in the web IDE. - -#### Unreachable Loops - -If a cell is known to have a value of `0` at compile time, and that cell is used to open a Brainfuck loop, then that entire loop is omitted. This is implemented at a low level, so it is agnostic of the syntactic structure that it is optimising, i.e `if`, `while`, `drain`. - -### Unimplemented Optimisations - -#### Memory Allocations - -The goal of this is to optimise placing variables in tape memory to minimise movement between them. - -#### Variable Usage - -The goal of this is to automatically change the order of variable allocations/frees to ensure tape memory is allocated for the smallest amount of execution steps possible. This would allow allocation to be more efficient, as cells can be allocated which would otherwise be taken by variables that are not in use. diff --git a/docs/standardlib.md b/docs/standardlib.md deleted file mode 100644 index 5792084..0000000 --- a/docs/standardlib.md +++ /dev/null @@ -1,77 +0,0 @@ -### Mastermind Standard Library - -Currently the Mastermind standard library is very limited, and is effectively a set of example programs included in the web IDE and source repository. - -#### Including files - -You can include/import other files using preprocessor directives. The Mastermind preprocessor is intended to mirror the C preprocessor, however it currently only supports the `#include` directive. - -The following is a basic example: - -``` -// file1.mmi -struct H { - cell a; -} -fn print(struct H h) { - output h.a; -} -``` - -``` -// main file being compiled -#include "file1.mmi" - -struct H h; -h.a = 64; -print(h); -// @ -``` - -#### Standard Library Examples - -The most mature files in the included examples are the following: - -- `bitops`: bitshifting operations for cell types -- `i8`: signed type for 8-bit integers and supporting functions -- `u8`: common supporting functions for cell types -- `u16`: a 16-bit unsigned integer type and supporting functions -- `ifp16`: a signed 16-bit fixed-point number type and supporting functions - -NOTE: due to current lack of header-guard support, importing multiple of these will likely cause a compiler error, until this is implemented, the best way to work around this is to only include `ifp16` as that includes the others. - -Example usage: - -``` -#include - -// read a 16 bit number from stdin, add 55, then print - -struct u16 n; -read(n); - -cell ff = 55; -add(n, ff); -print(n); -output ' '; -debug(n); // print the binary representation -// example input: 16000 -// output: 16055 0011111010110111 -``` - -Example fixed-point usage: - -``` -#include - -struct ifp16 n; -_99p99609375(n); // constant 99.99609375 -struct ifp16 m; -__1p5(m); // constant -1.5 - -divide(n, m); -print(n); -output ' '; -debug(n); -// -66.66 10111101.01010110 -``` diff --git a/docs/twodimensional.md b/docs/twodimensional.md deleted file mode 100644 index bf75f72..0000000 --- a/docs/twodimensional.md +++ /dev/null @@ -1,57 +0,0 @@ -### Two-Dimensional Brainfuck - -Two-dimensional Brainfuck is an extension which provides an additional dimension to the memory tape. - -To support this, two new operations have been added to this extended version of the language: - -- `^`: move up one cell on the grid -- `v`: move down one cell on the grid - -#### Using 2D Brainfuck in Mastermind - -This behaviour must be enabled in the included Brainfuck interpreter. In the web IDE this is done via the settings modal. - -When this setting is enabled in isolation, the compiler will still generate typical 1D Brainfuck code. To make the compiler use multiple dimensions you must either: - -- Use a 2D-specific memory allocation algorithm -- Use a 2D location specifier on a variable -- Use in-line Brainfuck with 2D instructions - -### Memory Allocation Algorithms - -There are currently four allocation strategies implemented (including the original 1D). - -#### 1D Mastermind - -_1D Mastermind_ allocates the closest free cells to the right of the origin. - -#### 2D Mastermind - Zig Zag - -_2D Mastermind - Zig Zag_ treats the memory as a grid and fills in values from x 0 and some y value diagonally until it reaches y 0 and the same x value as the starting y. The table below shows the order that this is populated - -| 7 | | | | -| --- | --- | --- | --- | -| 4 | 8 | | | -| 2 | 5 | 9 | | -| 1 | 3 | 6 | 10 | - -#### 2D Mastermind - Spiral - -_2D Mastermind - Spiral_ starts from 0,0 and move in a Spiral such that each subsequent memory -value is only 1 step away from the last. This means that it will start by filling a 2x2 grid then from the bottom corner of -that grid it will iterate around that 2x2 filling a 4x4 area - -| 10 | 11 | 12 | -| --- | --- | --- | -| 9 | 2 | 3 | -| 8 | 1 | 4 | -| 7 | 6 | 5 | - -#### 2D Mastermind - Tiles - -_2D Mastermind - Tiles_ allocates a tile of memory and check all cells in that area before expanding to check new cells. This algorithm starts at 0,0 with a 1x1 area then will move down to -1, -1 and check a new 3x3 area it will check each area column by column from the bottom row up so (-1, -1), (0, -1), (1, -1), (-1, 0)... - -| 4 | 6 | 9 | -| --- | --- | --- | -| 3 | 1 | 8 | -| 2 | 5 | 7 | diff --git a/docs/variables.md b/docs/variables.md deleted file mode 100644 index 5ca056d..0000000 --- a/docs/variables.md +++ /dev/null @@ -1,148 +0,0 @@ -### Variables - -#### Cells - -The base data type in Mastermind is the `cell`, this corresponds to a a single 8-bit cell on the Brainfuck tape. - -``` -cell var = 56; -cell c = 'g'; -cell bool = true; // true/false equivalent to 1/0 -``` - -#### Input/Output - -The `input` and `output` keywords in Mastermind correspond to the `,` and `.` operators in Brainfuck. `input` simply inputs the next byte from stdin, and `output` outputs a byte to stdout. - -``` -// stdin: 00abc -cell g; -drain 5 { - // do this 5 times - input g; - g += 1; - output g; -} -// stdout: 11bcd -``` - -The simplest way to display text is to output valid ASCII characters, however if your Brainfuck implementation supports unicode, that is also possible by outputting multiple bytes. - -``` -output 240; -output 159; -output 164; -output 145; -output 10; -// displays 🤑 (emoji with green cash for tongue) -``` - -#### Cell Arrays - -Variables can also be defined as contiguous arrays of cells. - -``` -// multi-cell: -cell[4] array_example = [1, 2, 3, 4]; -cell[5] string_example = "hello"; -cell[2] foo; -foo[0] = 204; -``` - -#### Structs - -Structure types can be defined with named fields, then instantiated as variables. - -``` -struct struct_name { - cell x; - cell y; - cell[5] zzzzz; -} - -struct struct_name s; -s.x = 4; -s.y = 123; -s.zzzzz[0] += 3; -s.zzzzz[4] = 180; - -// nested struct: -struct Nested { - struct struct_name n; -} -``` - -### Structs and Arrays - -Any type can be repeated into an array/contiguous allocation. This includes cells, structs, arrays of cells, and arrays of structs. - -``` -cell[4][6] mult_arr; // a 6-length array of cell[4] arrays -cell[4][6][2] mult_arr; // 2 * (6-length arrays of cell[4] arrays) - -struct T { - cell a; - cell[4][2] b; -} - -struct T[10] ten_T_structs; -ten_T_structs[4].b[1][3] = 45; - -struct S { - struct T[2][4] matrix_of_T_structs; - cell other; -} - -struct S[3] three_S_structs; -three_S_structs[1].matrix_of_T_structs[3][0] = '5'; -``` - -#### Note: Array indices must be compile-time constant integers - -This is a limitation of Brainfuck, getting around this problem requires more runtime code is worth including for the sake of optimisations. You can implement equivalent behaviour using in-line Brainfuck, structs, and functions. - -### Location specifiers - -The exact memory cells occupied by a variable can be specified: - -``` -cell a @4 = 1; // value 1 at tape position 4 -``` - -#### Struct subfields - -The byte-order and positioning of a struct's subfields can be specified: - -``` -struct T { - cell a @1; - cell b[2] @3; -} -// struct T's layout: -// (-, a, -, b[0], b[1]) -// '-' denotes an untracked padding cell -``` - -#### Variable - -When using in-line Brainfuck (see other document), the Brainfuck scope's starting position can be specified with variables: - -``` -cell d; -bf @d { - // brainfuck code here -} - -struct G { - cell h; - cell i; - cell j; -} -struct G g; - -bf @g { - // starts on the first cell of g's allocation -} -// equivalent to: -bf @g.h {} -``` diff --git a/load_env.sh b/load_env.sh new file mode 100755 index 0000000..429e3ec --- /dev/null +++ b/load_env.sh @@ -0,0 +1,2 @@ +echo "VITE_GIT_COMMIT_HASH=$(git rev-parse --short HEAD)" > .env +echo "VITE_GIT_COMMIT_BRANCH=$(git branch --show-current)" >> .env diff --git a/package.json b/package.json index c620392..7fd3cfc 100644 --- a/package.json +++ b/package.json @@ -5,10 +5,10 @@ "type": "module", "scripts": { "test:wasm": "cd compiler && cargo test", - "dev": "vite", + "dev": "./load_env.sh && vite", "build:grammar": "yarn run lezer-generator src/lexer/mastermind.grammar -o src/lexer/mastermind_parser.js", "build:wasm": "cd compiler && wasm-pack build --target web", - "build": "tsc && vite build", + "build": "./load_env.sh && tsc && vite build", "profile:wasm": "yarn build:wasm --profiling && yarn build && twiggy top -n 20 compiler/pkg/mastermind_bg.wasm", "preview": "vite preview" }, @@ -30,6 +30,7 @@ }, "devDependencies": { "@lezer/generator": "^1.5.1", + "@types/node": "^24.10.1", "@types/uuid": "^9.0.7", "typescript": "^5.2.2", "vite": "^5.0.0", diff --git a/programs/examples/brainfuck.mmi b/programs/examples/brainfuck.mmi index 3996d70..23a6737 100644 --- a/programs/examples/brainfuck.mmi +++ b/programs/examples/brainfuck.mmi @@ -1,5 +1,8 @@ // Accepts a brainfuck program as input, then runs it +// NOTE: you will have to disable blocking input in the web IDE, +// otherwise the program will hang waiting for input indefinitely + // frames: // @0: bf_instr_marker // @1: bf_instr diff --git a/programs/std/stack b/programs/std/stack new file mode 100644 index 0000000..9cc7298 --- /dev/null +++ b/programs/std/stack @@ -0,0 +1,266 @@ +// Credit to @MSMissing for contributing this stack data type. + +// Be aware this data type uses embedded Brainfuck operations which may break user programs if not used carefully. +// The standard library currently lacks a testing framework, so until that is added, use at own risk :) + + +struct stack32 { // STACK CANNOT CONTAIN ZEROES. + cell len @0; + cell temp @1; + cell zero @2; + cell[32] content @3; + cell end @35; // if this is not equal to zero, panic. +} + +// DOCUMENTATION + +// struct stack32 - 32-cell stack; uses 35 cells. + +// push_d(stack, value) - push a value destructively to the stack. +// push(stack, value) - push a value to the stack. slower than push_d +// unshift_d(stack, value) - push a value destructively into the bottom of the stack. +// unshift(stack, value) - push a value into the bottom of the stack. + +// pop(stack, *output) - pop a value from the stack. +// shift(stack, *output) - shift a value from the bottom of the stack. +// shift is faster than pop. + +// reverse(stack) - reverses the stack. +// clear(stack) - clears the stack. Suggested if a stack is about to go out of scope. + + +fn __zero_to_top() { + bf { + >[>]< + } +} + +fn __top_to_zero() { + bf { + [<] + } +} + +fn push_d(struct stack32 stack, cell value) { + value -= 1; + stack.len += 1; + bf @stack.zero clobbers *stack.content { + {__zero_to_top();} + >+ + {__top_to_zero();} + } + + while value { + value -= 1; + bf @stack.zero clobbers *stack.content { + {__zero_to_top();} + + + {__top_to_zero();} + } + } +} + +fn push(struct stack32 stack, cell value) { + cell _value = value; + push_d(stack, _value); +} + + +fn pop(struct stack32 stack, cell out) { + stack.len -= 1; + bf @stack.zero clobbers *stack.content stack.temp { + {__zero_to_top();} + [>+<-]>[ + -<<[<] + { + cell temp @-1; + assert temp unknown; + temp += 1; + } + >[>]> + ] + <<{__top_to_zero();} + } + drain stack.temp into out; +} + +fn pop(struct stack32 stack) { + stack.len -= 1; + bf @stack.zero clobbers *stack.content { + {__zero_to_top();} + [-]< + {__top_to_zero();} + } +} + +fn shift(struct stack32 stack, cell out) { + out = 0; + stack.len -= 1; + drain stack.content[0] into out; + bf @stack.content[1] clobbers *stack.content { [[<+>-]>]<<[<]>> } +} + +fn shift(struct stack32 stack) { + stack.len -= 1; + drain stack.content[0]; + bf @stack.content[1] clobbers *stack.content { [[<+>-]>]<<[<]>> } +} + +fn unshift_d(struct stack32 stack, cell value) { + stack.len += 1; + bf @stack.zero clobbers *stack.content { + {__zero_to_top();} + [[->+<]<] + } + assert stack.content[0] equals 0; + drain value into stack.content[0]; +} + +fn unshift(struct stack32 stack, cell value) { + cell _value = value; + unshift_d(stack, value); +} + +fn clear(struct stack32 stack) { + while stack.len { + pop(stack); + } + assert *stack.content equals 0; +} + +fn move_stack(struct stack32 in, struct stack32 out) { // out must be empty + cell x; + while in.len { + shift(in, x); + push_d(out, x); + } + assert *in.content equals 0; +} + +fn copy_stack(struct stack32 src, struct stack32 out) { + cell x; + struct stack32 _src; + while src.len { + shift(src, x); + push(out, x); + push_d(_src, x); + } + while _src.len { + shift(_src, x); + push_d(src, x); + } + assert *_src.content equals 0; +} + +fn reverse(struct stack32 stack, struct stack32 out) { // out must be empty + cell x; + while stack.len { + pop(stack, x); + push_d(out, x); + } + assert *stack.content equals 0; +} + +fn reverse(struct stack32 stack) { + struct stack32 out; + reverse(stack, out); + move_stack(out, stack); +} + + +// debug/test code: +// #include +// fn p(cell x) {print(x); output " ("; debug(x); output ")\n";} +// fn p(struct stack32 s) { +// output "STACK:\n"; +// output "len = "; p(s.len); +// output "temp = "; p(s.temp); +// output "zero = "; p(s.zero); +// output "content[0] = "; p(s.content[0]); +// output "content[1] = "; p(s.content[1]); +// output "content[2] = "; p(s.content[2]); +// output "content[3] = "; p(s.content[3]); +// output "content[4] = "; p(s.content[4]); +// output "\n"; +// } + +// // pushing and popping: +// output "PUSH/POP TESTING START\n"; +// struct stack32 s1; +// cell x = 6; +// p(s1); +// push_d(s1, x); +// x += 7; +// p(s1); +// push(s1, x); +// p(s1); + +// push(s1, x); +// p(s1); +// pop(s1); +// p(s1); + +// cell y; +// p(y); +// pop(s1, y); +// p(y); +// p(s1); +// pop(s1, y); +// p(y); +// p(s1); + +// output "PUSH/POP TESTING END\n\n"; + + +// // unshifting and shifting: +// output "UNSHIFT/SHIFT TESTING START\n"; +// struct stack32 s2; +// x = 6; +// p(s2); +// unshift_d(s2, x); +// x += 7; +// p(s2); +// unshift(s2, x); +// p(s2); + +// x = 255; +// unshift(s2, x); +// p(s2); + +// struct stack32 snapshot1; +// copy_stack(s2, snapshot1); +// struct stack32 snapshot2; +// copy_stack(s2, snapshot2); + +// shift(s2); +// p(s2); + +// y = 0; +// p(y); +// shift(s2, y); +// p(y); +// p(s2); +// shift(s2, y); +// p(y); +// p(s2); + +// output "SNAPSHOTS:\n"; +// p(snapshot1); +// p(snapshot2); + +// move_stack(snapshot1, s2); +// p(s2); +// pop(s2); +// move_stack(s2, snapshot1); +// copy_stack(snapshot2, s2); +// shift(s2); +// reverse(snapshot1); + +// output "s2:\n"; +// p(s2); +// output "snapshot1:\n"; +// p(snapshot1); +// output "snapshot2:\n"; +// p(snapshot2); + +// output "UNSHIFT/SHIFT TESTING END\n\n"; diff --git a/reference.md b/reference.md new file mode 100644 index 0000000..66ea956 --- /dev/null +++ b/reference.md @@ -0,0 +1,789 @@ +# Mastermind reference + +## Introduction + +Mastermind is a programming language designed to compile to the well-known esoteric language "Brainfuck". + +Brainfuck is essentially a modern interpretation of the classical Turing machine. It consists of a tape of 8-bit values, with simple increment/decrement, move left/right, input/output, and looping operations. The full language only uses 8 control characters: `+-><.,[]`. + +Imagine an alternate reality where C was designed for computer architectures that run Brainfuck natively, that is what Mastermind is intended to be. + +## Contents + +- [Brainfuck](#brainfuck) +- [Variables](#variables) +- [Conditionals](#conditionals) +- [Loops](#loops) +- [Functions](#functions) +- [Inline Brainfuck](#in-line-brainfuck) +- [Standard Library](#standard-library) +- [Variants](#variants) +- [Optimisations](#optimisations) + +## Brainfuck + +Brainfuck is an esoteric programming language, originally designed as a theoretical example of a Turing complete language with an extremely minimal compiler. The name is due to its difficulty, it is significantly more difficult to create complex programs than in any popular modern language. + +### Specification + +When a Brainfuck program is run, it operates on a array/tape of cells, performing operations on the tape. Each cell contains an integer, initialised to 0 by default. The program operates on one cell at a time based on the position of a "tape head". Brainfuck supports the following operations: + +- `+`: increment the value of the current cell +- `-`: decrement the value of the current cell +- `>`: move the tape head one cell to the right +- `<`: move the tape head one cell to the left +- `.`: output the current cell as a byte to stdout +- `,`: input a byte from stdin, overwriting the current cell +- `[`: jump to the corresponding `]` if the current cell is 0 +- `]`: jump to the corresponding `[` if the current cell is not 0 + +A Brainfuck program consists of a list of these commands, which are executed sequentially. The program terminates if the final operation in the list is executed. + +### Interpreter Implementation Details + +The Mastermind IDE and compiler library contains an implementation of a Brainfuck interpreter. This implementation is intended to match the behaviour of the most popular Brainfuck implementations: + +#### 8-bit Wrapping Cells + +In this implementation, each cell is an 8-bit integer that wraps if an increment or decrement operation overflows or underflows. + +E.g. given the current tape cell value is `255`, after an increment (`+`), the cell value is now `0`. + +Similarly: `0`, after a decrement (`-`) becomes `255` + +#### Infinite Bidirectional Tape + +In this implementation, the tape extends infinitely in both directions. + +## Variables + +### Cells + +The base data type in Mastermind is the `cell`, this corresponds to a a single 8-bit cell on the Brainfuck tape. + +``` +cell var = 56; +cell c = 'g'; +cell bool = true; // true/false equivalent to 1/0 +``` + +Cells default to `0`. + +### Input/Output + +The `input` and `output` keywords in Mastermind correspond to the `,` and `.` operators in Brainfuck. `input` simply inputs the next byte from stdin, and `output` outputs a byte to stdout. + +``` +// stdin: 00abc +cell g; +drain 5 { + // do this 5 times + input g; + g += 1; + output g; +} +// stdout: 11bcd +``` + +The simplest way to display text is to output valid ASCII characters. If your Brainfuck implementation supports unicode, that is also possible by outputting multiple bytes. + +``` +output 240; +output 159; +output 164; +output 145; +output 10; +// displays 🤑 (emoji with green cash for tongue) +``` + +### Cell Arrays + +Variables can also be defined as contiguous arrays of cells. + +``` +// multi-cell: +cell[4] array_example = [1, 2, 3, 4]; +cell[5] string_example = "hello"; +cell[2] foo; +foo[0] = 204; +``` + +### Structs + +Structure types can be defined with named fields, then instantiated as variables. + +``` +struct struct_name { + cell x; + cell y; + cell[5] zzzzz; +} + +struct struct_name s; +s.x = 4; +s.y = 123; +s.zzzzz[0] += 3; +s.zzzzz[4] = 180; + +// nested struct: +struct Nested { + struct struct_name n; +} +``` + +### Structs and Arrays + +Any type can be repeated into an array/contiguous allocation. This includes cells, structs, arrays of cells, and arrays of structs. + +``` +cell[4][6] mult_arr; // a 6-length array of cell[4] arrays +cell[4][6][2] mult_arr; // 2 * (6-length arrays of cell[4] arrays) + +struct T { + cell a; + cell[4][2] b; +} + +struct T[10] ten_T_structs; +ten_T_structs[4].b[1][3] = 45; + +struct S { + struct T[2][4] matrix_of_T_structs; + cell other; +} + +struct S[3] three_S_structs; +three_S_structs[1].matrix_of_T_structs[3][0] = '5'; +``` + +#### Note: Array indices must be compile-time constant integers + +This is a limitation of Brainfuck, getting around this problem requires more runtime code than is reasonable to include by default, due to the goals of Mastermind. You can implement equivalent behaviour using in-line Brainfuck, structs, and functions. + +### Location specifiers + +The exact memory cells occupied by a variable can be specified: + +``` +// value 1 at tape position 4 +cell a @4 = 1; +// contiguous array of 1s, starting at cell -1 +cell[3] a @-1 = [1, 1, 1]; +``` + +#### Struct subfields + +The byte-order and positioning of a struct's subfields can be specified: + +``` +struct T { + cell a @1; + cell b[2] @3; +} +// struct T's layout: +// (-, a, -, b[0], b[1]) +// '-' denotes an untracked padding cell +``` + +## Conditionals + +Mastermind supports basic `if`/`else` statements. An `if` statement accepts an expression that evaluates to a `cell` type, if the expression is evaluated to be truthy (i.e. not equal to `0`), then the `if` block is executed, otherwise the optional `else` block is executed. This behaviour can be inverted using the `not` keyword. + +``` +if 13 { + output "13"; +} + +if not true { + // unreachable +} + +cell var = 4; +if var { + output "true"; +} else { + output "false"; +} + +// typical equivalence use-case: +if not var - 10 { + // == +} else { + // != +} +``` + +## Loops + +Mastermind currently supports three forms of loops: `while`, `drain` and `copy`. + +It should be noted that there is no early breaking in any of these forms, so all clauses in a loop body are always executed in each iteration. + +### While + +The `while` loop operates similarly to other languages, accepting a condition expression, and a loop body. + +The clauses inside the loop body are executed until the condition is falsy (i.e. equal to `0`). The condition is checked before each iteration. + +Note: currently `while` conditions must be direct variable references, this is subject to future compiler updates. + +``` +cell n = 5; +while n { + // do stuff + n -= 1; +} +// n is now equal to 0 +``` + +### Drain + +The `drain` loop mirrors a very common pattern found in Brainfuck programs: decrementing a cell. `drain` accepts an expression, a list of variables to 'drain into', and/or a loop body. + +If the expression is a direct variable reference, then the variable is decremented after each iteration. If not, it is evaluated in a temporary cell, then decremented after each iteration. + +``` +drain var { + // do stuff +} + +// equivalent to: +while var { + // do stuff + var -= 1; +} +``` + +With expressions: + +``` +drain 6 { + output 'a'; +} +// aaaaaa +``` + +The following example leaves `x` unchanged: + +``` +cell x = 7; +drain x - 2 { + output 'b'; +} +// bbbbb +``` + +#### Into + +If the `into` keyword is used, followed by a whitespace-separated list of target variables, the targets will be incremented after each iteration. + +``` +cell i; +drain 10 into i { + output '0' + i; +} +// 0123456789 + +// equivalent to: +cell i; +cell ten = 10; +while ten { + output '0' + i; + + i += 1; + ten -= 1; +} +``` + +Another example: + +``` +drain var into other_var other_var_2 *spread_array; + +// equivalent to: +drain var { + other_var += 1; + other_var_2 += 1; + spread_array[0] += 1; + spread_array[1] += 1; + spread_array[2] += 1; + // ... +} +``` + +### Copy + +The `copy` loop acts similarly to the `drain` loop, however the expression must be a direct variable reference, and it is left unchanged afterwards, and its original value is accessible within the loop body. + +``` +cell y; +copy x into y { + // loop body +}; +``` + +An equivalence example: + +``` +cell var = 5; +copy var { + output '0' + var; +} +// 55555 + +// equivalent to: +cell var = 5; +cell temp = var; +while temp { + output '0' + var; + + temp -= 1; +} +``` + +## Functions + +Mastermind supports a minimal function system: functions can be defined with a name and a fixed number of typed arguments. + +``` +fn newline() { output '\n'; } + +fn print_zeros(cell num) { + copy num { + output '0'; + } + newline(); +} + +// expressions as arguments are currently not supported, +// i.e. print_zeros(9) +cell g = 9; +print_zeros(g); +``` + +Unlike most modern programming languages, functions are not considered first-class values. Functions in Mastermind are in-lined at compile-time, and all arguments are passed by reference. Values can be returned by editing passed in arguments, or editing variables in an outer scope, although the latter makes a function less portable. + +``` +fn is_zero(cell in, cell out) { + out = true; + if in { + out = false; + } +} + +cell value = 'h'; +cell falsy; +is_zero(value, falsy); +``` + +Example showing a function reading a variable from an outer scope: + +``` +fn print_global_g(cell count) { + copy count { + output chr; + } +} + +cell chr = 'g'; +cell count = 3; +print_global_g(count); +// ggg + +{ + // inner scope with a new 'g' allocation + cell chr = 'G'; + count = 5; + print_global_g(count); + // GGGGG +} + +// same call again, now the inner chr has been freed +print_global_g(count); +// ggg +``` + +### Types and Overloads + +Functions support overloads with different types or number of arguments. Examples of supported behaviour: + +``` +fn func1() { + output '1'; +} +fn func1(cell a) { + output '2'; +} +fn func1(cell a, cell b) { + output '3'; +} +struct X { cell a; } +fn func1(struct X x) { + output '4'; +} +struct Y { cell a; } +fn func1(struct Y y) { + output '5'; +} +fn func1(cell a, struct X x, struct Y y) { + output '6'; +} +cell n; +struct X x; +struct Y y; +func1(); +func1(n); +func1(n, n); +func1(x); +func1(y); +func1(n, x, y); +// 123456 +``` + +## In-Line Brainfuck + +In-line Brainfuck allows the programmer to define custom behaviour as if writing raw Brainfuck, inspired by in-line assembly in C. + +Basic example: + +``` +// find the next cell that equals -1 +bf { + +[->+]- +} +``` + +More advanced example: + +``` +// input a line of lowercase letters and output the uppercase version +// this is an intentionally inefficient example +bf @3 clobbers var *spread_var etc { + ,----------[++++++++++>,----------] + <[<]> + [ + { + cell g @0; + assert g unknown; + output g + ('A' - 'a'); + // embedded Mastermind! + } + > + ] + // now clear and return + <[[-]<]> +} +``` + +It is the programmer's responsibility to clear used cells and return back to the cell in which they started the in-line Brainfuck context. If the programmer does not do this, any following Mastermind code may break. + +### Memory location specifiers + +The exact location to start an in-line Brainfuck context can be specified: + +``` +cell var @3 = 4; +// compiled: >>>++++ + +bf @4 { + <><><> +} +// compiled: >>>><><><> +``` + +Variables can also be used: + +``` +cell d; +bf @d { + // brainfuck code here +} + +struct G { + cell h; + cell i; + cell j; +} +struct G g; + +bf @g { + // starts on the first cell of g's allocation +} +// equivalent to: +bf @g.h {} +``` + +### Clobbering and Assertions + +With optimisations enabled, Mastermind will try to predict the value of cells at compile-time, so it can prevent unnecessary cell clean-ups and unreachable code. If your in-line Brainfuck affects existing Mastermind variables, you should tell the compiler using the `clobbers` keyword, the syntax is similar to the `drain into` target list: + +``` +bf clobbers var *spread_var other_var etc {} +``` + +The compiler will now assume nothing about the values of those variables afterwards. + +If instead you want to tell the compiler specifically that a variable has become a certain value, you can use `assert`: + +``` +assert var equals 3; +// most common use cases: +assert var equals 0; +assert var unknown; +``` + +Asserting a variable as `unknown` is equivalent to clobbering. + +### Embedded Mastermind + +You can embed high-level Mastermind code within a Brainfuck context. During compilation the embedded Mastermind is compiled and the generated Brainfuck is inserted in place. + +``` +// input 3 n-length lines of input +bf { + >+++<,[ + { + cell input_char @0; + assert input_char unknown; + cell length_remaining @1; + assert length_remaining unknown; + + cell next_char @2; + cell next_length_remaining @3; + if not input_char - '\n' { + length_remaining -= 1; + } + if length_remaining { + drain length_remaining into next_length_remaining; + input next_char; + } + } + >>] +} +``` + +Embedded Mastermind can include in-line Brainfuck, this is recursive. For example: + +``` +// top-level Mastermind context +bf { + ++>> + { + // inner Mastermind context + bf { + ++>> + { + // inner inner Mastermind context + bf { + ++>> + { + //... + } + <<-- + } + } + <<-- + } + } + <<-- +} +``` + +The compiler cannot guarantee the global head position at compile time within an in-line Brainfuck context. Therefore memory location specifiers are relative to the current embedded Mastermind context, not the entire program. + +Also, top-level variables are not cleared by default in Mastermind contexts, this allows you to "leave" variables in cells for your Brainfuck to use. If you want variables in your embedded Mastermind to be automatically cleared, you can open a scope at the top level: + +``` +bf { + ++----++[][][<><><>] // the program doesn't matter for this example + { + // variables here will not be cleared + cell g @2; + assert g unknown; + { + // variables here will be cleared + let b = 32; + } + } + {{ + // self-cleaning Mastermind code here + }} +} +``` + +## Standard Library + +Currently the Mastermind standard library is very limited, and is effectively a set of example programs included in the web IDE and source repository. + +### Including files + +You can include/import other files using preprocessor directives. The Mastermind preprocessor is intended to mirror the C preprocessor, however it currently only supports the `#include` directive. + +The following is a basic example: + +``` +// file1.mmi +struct H { + cell a; +} +fn print(struct H h) { + output h.a; +} +``` + +``` +// main file being compiled +#include "file1.mmi" + +struct H h; +h.a = 64; +print(h); +// @ +``` + +### Standard Library Examples + +The most mature files in the included examples are the following: + +- `bitops`: bitshifting operations for cell types +- `i8`: signed type for 8-bit integers and supporting functions +- `u8`: common supporting functions for cell types +- `u16`: a 16-bit unsigned integer type and supporting functions +- `ifp16`: a signed 16-bit fixed-point number type and supporting functions +- `stack`: includes a cell stack and associated functions that can hold up to 32 non-zero elements + +NOTE: due to current lack of header-guard support, importing multiple of these will likely cause a compiler error, until this is implemented, the best way to work around this is to only include `ifp16` as that includes the others. + +Example usage: + +``` +#include + +// read a 16 bit number from stdin, add 55, then print + +struct u16 n; +read(n); + +cell ff = 55; +add(n, ff); +print(n); +output ' '; +debug(n); // print the binary representation +// example input: 16000 +// output: 16055 0011111010110111 +``` + +Example fixed-point usage: + +``` +#include + +struct ifp16 n; +_99p99609375(n); // constant 99.99609375 +struct ifp16 m; +__1p5(m); // constant -1.5 + +divide(n, m); +print(n); +output ' '; +debug(n); +// -66.66 10111101.01010110 +``` + +## Variants + +The Mastermind compiler can be extended to support Brainfuck variants. + +### Supported Variants: + +#### Classic (1D) Brainfuck + +This is the default behaviour, typical Brainfuck implementation as described in [Brainfuck](#brainfuck). + +#### 2D Brainfuck + +Mastermind currently supports two-dimensional Brainfuck, this is a Brainfuck variant with an additional dimension in the memory array. + +2D Brainfuck support can be enabled in the compiler settings in the web IDE, adding the following features: + +- New opcodes for in-line Brainfuck contexts and in generated Brainfuck code: + - `^`: move up one cell in the grid + - `v`: move down one cell in the grid +- The ability to specify 2D coordinates for location specifiers: + ``` + cell var @(5, -7) = 'a'; + bf @var {[-<<<<<^^^^^^^+>>>>>vvvvvvv]} + bf @(0, 0) {.....} + // aaaaa + ``` +- Three new memory allocation strategies for generated 2D code: + - Zig Zag + - Spiral + - Tiles + + +## Optimisations + +The Mastermind compiler includes optional optimisations for generated code. The original goal of Mastermind was to generate very minimal Brainfuck for use in Code Golf competitions, so most of these are aimed at reducing generated code length. + + + +### Cell Clearing + + + +Optimises clearing cells after they are de-allocated, it does this by tracking their values at compile-time and acting based on a cell's known value. For instance, if a cell can be proven at compile-time to have the value `2`, it is more efficient to clear with `--`, than the typical Brainfuck clear: `[-]`. + +### Constants + + + +When large values are added in Brainfuck, the naive approach is to use the increment `-` operator for as many times as needed. The constants optimiser will use multiplication to shorten the code needed to add/subtract large values. Example: the value `46` can be achieved by either `++++++++++++++++++++++++++++++++++++++++++++++` or the shorter: `+++++[>+++++++++<-]>+` (5 \* 9 + 1). + +### Generated Code + + + +Optimises generated Brainfuck code by shortening trivial program segments. + +Currently this is limited to optimising segments of Brainfuck programs with the following operations: `+`, `-`, `>`, `<`, `[-]`. + +``` +--->>><<<++ +// becomes: +- +``` + +An end-to-end example: + +``` +cell h = 4; +cell j = 3; + +h += 10; + +drain 10 { + j = 5; + h += 4; + j += 1; +} + +// compiles to: +++++>+++<++++++++++>>++++++++++[<+<++++>[-]+++++>-] +// after optimisation: +++++++++++++++>+++>++++++++++[-<[-]+++++<++++>>] +``` + +This system finds optimal equivalent segments for classic Brainfuck programs, however for the 2D Brainfuck variant it is not guaranteed, as finding the optimal path between memory cells in a 2D grid is more difficult. The _Generated Code Permutations_ setting enables an exhaustive search for the optimal path when using the 2D Brainfuck variant, otherwise a greedy approach is used. + +### Empty Blocks + + + +Detects if a code block is empty or has no effect on the program, and prunes the associated clause. + +### Unreachable Loops + + + +Brainfuck loops will be omitted if the cell they start on can be proven to be `0` at compile-time. diff --git a/runningMastermind.md b/runningMastermind.md deleted file mode 100644 index 0c24a89..0000000 --- a/runningMastermind.md +++ /dev/null @@ -1,36 +0,0 @@ -# Running Mastermind - -### 1. Install Rust -Install rust through the following website - https://www.rust-lang.org/tools/install - -This will also install Cargo which is needed to build the project - -### 2. Install Yarn -If not currently installed please install Yarn if unsure follow this guide - https://classic.yarnpkg.com/lang/en/docs/install - -### 3. Install wasm-pack -Install using Yarn, Cargo or the following guide https://rustwasm.github.io/wasm-pack/installer/ - -### 4. Run Yarn Install -Install the Javascript dendencies by running -```bash - yarn install -``` - -### 5. Build the grammar -Build the grammar using the following yarn command -```bash - yarn build:grammar -``` - -### 6. Build Web Assembly Pack -Build Web Assembly Pack using the following yarn command -```bash - yarn build:wasm -``` - -### 7. Run Dev Mode -Run Dev mode using the following command -```bash - yarn dev -``` \ No newline at end of file diff --git a/src/App.css b/src/App.css index c02d1ad..5a0dfbc 100644 --- a/src/App.css +++ b/src/App.css @@ -22,16 +22,18 @@ } .sidebar { - flex: 1; - display: flex; flex-direction: column; align-items: center; justify-content: center; overflow: hidden; + padding-top: 0.5em; + padding-bottom: 0.5em; + padding-left: 0.25em; + padding-right: 0.25em; + gap: 1em; } - .code-panel { position: relative; background-color: var(--BG-2); diff --git a/src/App.tsx b/src/App.tsx index 533790e..431c9e5 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -31,6 +31,7 @@ import std_i8 from "../programs/std/i8?raw"; import std_u8 from "../programs/std/u8?raw"; import std_u16 from "../programs/std/u16?raw"; import std_ifp16 from "../programs/std/ifp16?raw"; +import std_stack from "../programs/std/stack?raw"; import "./App.css"; import Divider from "./components/Divider"; @@ -52,7 +53,7 @@ import { const AppContext = createContext(); // update this when you want the user to see new syntax -const MIGRATION_VERSION = 11; +const MIGRATION_VERSION = 12; const App: Component = () => { const [version, setVersion] = makePersisted(createSignal(), { @@ -191,6 +192,11 @@ const App: Component = () => { label: "ifp16", rawText: std_ifp16, }, + { + id: uuidv4(), + label: "stack", + rawText: std_stack, + }, { id: uuidv4(), label: "Fixed Point Calculator", @@ -567,9 +573,7 @@ const App: Component = () => { -
- -
+ ); diff --git a/src/components/Docs.tsx b/src/components/Docs.tsx deleted file mode 100644 index e4f2f52..0000000 --- a/src/components/Docs.tsx +++ /dev/null @@ -1,104 +0,0 @@ -import { Portal } from "solid-js/web"; -import { SolidMarkdown } from "solid-markdown"; -import remarkGfm from "remark-gfm"; -import { IoClose } from "solid-icons/io"; -import { Component, createEffect, createSignal, JSX, Show } from "solid-js"; -import { useAppContext } from "../App"; - -import intro from "../../docs/intro.md?raw"; -import brainfuck from "../../docs/brainfuck.md?raw"; -import variables from "../../docs/variables.md?raw"; -import conditionals from "../../docs/conditionals.md?raw"; -import loops from "../../docs/loops.md?raw"; -import functions from "../../docs/functions.md?raw"; -import inlinebrainfuck from "../../docs/inlinebrainfuck.md?raw"; -import standardlib from "../../docs/standardlib.md?raw"; -import twodimensional from "../../docs/twodimensional.md?raw"; -import optimisations from "../../docs/optimisations.md?raw"; - -import { FaSolidArrowLeftLong, FaSolidArrowRightLong } from "solid-icons/fa"; -const DocsModal: Component<{ style?: JSX.CSSProperties }> = () => { - const app = useAppContext()!; - const docs = { - Introduction: intro, - Brainfuck: brainfuck, - Variables: variables, - Conditionals: conditionals, - Loops: loops, - Functions: functions, - "Inline Brainfuck": inlinebrainfuck, - "Standard Library": standardlib, - "2D Mastermind": twodimensional, - Optimisations: optimisations, - }; - const titles = Object.keys(docs); - const [selected, setSelected] = createSignal(titles[0]); - const [docsContent, setDocsContent] = createSignal( - docs[selected() as keyof typeof docs] ?? "" - ); - createEffect(() => { - setDocsContent(docs[selected() as keyof typeof docs] ?? ""); - }); - - function nextDoc() { - setSelected(titles[(titles.indexOf(selected() ?? "") + 1) % titles.length]); - } - function prevDoc() { - setSelected( - titles[ - (titles.indexOf(selected() ?? "") - 1 + titles.length) % titles.length - ] - ); - } - - return ( - - {/* The weirdest solid js feature, puts the component into the top level html body */} - -
app.setDocsOpen(false)} - > -
e.stopPropagation()}> - - - -
- - {docsContent()} - -
- app.setDocsOpen(false)} - /> -
-
-
-
- ); -}; - -export default DocsModal; diff --git a/src/components/Settings.tsx b/src/components/Settings.tsx index ae82d59..54142a5 100644 --- a/src/components/Settings.tsx +++ b/src/components/Settings.tsx @@ -5,25 +5,24 @@ import { useAppContext } from "../App"; // TODO: FIX THIS SO WE DON'T HAVE 2 PERSISTED VALUES ONLY ONE const SettingsModal: Component<{ style?: JSX.CSSProperties }> = () => { + // TODO: refactor this const MemoryAllocationOptions: string[] = [ - "1D Mastermind", - "2D Mastermind - Zig Zag", - "2D Mastermind - Spiral", - "2D Mastermind - Tiles", - //NOT IMPLEMENTED - // "2D Mastermind - Nearest", + "Classic", + "2D Zig Zag", + "2D Spiral", + "2D Tiles", ]; - const tickboxKeys: (keyof OptimisationSettings)[] = [ - "optimise_cell_clearing", - "optimise_constants", - "optimise_empty_blocks", - "optimise_generated_code", - "optimise_generated_all_permutations", - "optimise_memory_allocation", - "optimise_unreachable_loops", - "optimise_variable_usage", - ]; + const tickboxKeys: (keyof OptimisationSettings)[] = [ + "optimise_cell_clearing", + "optimise_constants", + "optimise_empty_blocks", + "optimise_generated_code", + "optimise_generated_all_permutations", + "optimise_unreachable_loops", + // "optimise_memory_allocation", + // "optimise_variable_usage", + ]; const app = useAppContext()!; return ( @@ -34,110 +33,113 @@ const SettingsModal: Component<{ style?: JSX.CSSProperties }> = () => { class="readme-modal-container" onClick={() => app.setSettingsOpen(false)} > -
e.stopPropagation()}> -

SETTINGS

-
- Optimisations: - - app.setConfig((prev) => { - const b = tickboxKeys.some((key) => !prev[key]); - return { - ...prev, - ...Object.fromEntries( - tickboxKeys.map((key) => [key, b]) - ) - } as MastermindConfig; - }) - } - > - (toggle all) - - -
{ - const target = e.target as HTMLInputElement; - app.setConfig((prev) => ({ - ...prev, - [target.name]: !!target.checked, - })); - }} +
e.stopPropagation()}> +

SETTINGS

+
+ + Optimisations: + + app.setConfig((prev) => { + const b = tickboxKeys.some((key) => !prev[key]); + return { + ...prev, + ...Object.fromEntries( + tickboxKeys.map((key) => [key, b]) + ), + } as MastermindConfig; + }) + } > - tickboxKeys.includes(key as keyof OptimisationSettings) - )}> - {([key, enabled]: [string, boolean]) => ( - - )} - - - -
- 2D GENERATION: + (toggle all) +
-
+ { + const target = e.target as HTMLInputElement; + app.setConfig((prev) => ({ + ...prev, + [target.name]: !!target.checked, + })); + }} + > + + tickboxKeys.includes(key as keyof OptimisationSettings) + )} + > + {([key, enabled]: [string, boolean]) => ( - - - -
- + + +
+ 2D GENERATION: +
+
+ + + +
+ app.setSettingsOpen(false)} + /> +
@@ -152,9 +154,9 @@ interface OptimisationSettings { optimise_empty_blocks: boolean; optimise_generated_code: boolean; optimise_generated_all_permutations: boolean; - optimise_memory_allocation: boolean; optimise_unreachable_loops: boolean; - optimise_variable_usage: boolean; + // optimise_memory_allocation: boolean; + // optimise_variable_usage: boolean; } interface TwoDimensionalSettings { @@ -170,22 +172,22 @@ const optimisationLabels: Record = { optimise_cell_clearing: "cell clearing", optimise_constants: "constants", optimise_empty_blocks: "empty blocks", - optimise_generated_code: "generated code", - optimise_generated_all_permutations: "generated code permutations (May slow larger projects)", - optimise_memory_allocation: "memory allocations", optimise_unreachable_loops: "unreachable loops", - optimise_variable_usage: "variable usage", + optimise_generated_code: "generated code", + optimise_generated_all_permutations: "generated code permutations", + // optimise_memory_allocation: "memory allocations", + // optimise_variable_usage: "variable usage", }; export const DEFAULT_MASTERMIND_CONFIG = { - optimise_cell_clearing: false, - optimise_constants: false, - optimise_empty_blocks: false, - optimise_generated_code: false, - optimise_generated_all_permutations: false, - optimise_memory_allocation: false, - optimise_unreachable_loops: false, - optimise_variable_usage: false, - memory_allocation_method: 0, - enable_2d_grid: false, - }; + optimise_cell_clearing: false, + optimise_constants: false, + optimise_empty_blocks: false, + optimise_generated_code: false, + optimise_generated_all_permutations: false, + optimise_unreachable_loops: false, + // optimise_memory_allocation: false, + // optimise_variable_usage: false, + memory_allocation_method: 0, + enable_2d_grid: false, +}; diff --git a/src/panels/CompilerPanel.tsx b/src/panels/CompilerPanel.tsx index 9c4341d..932efdd 100644 --- a/src/panels/CompilerPanel.tsx +++ b/src/panels/CompilerPanel.tsx @@ -34,10 +34,6 @@ const CompilerPanel: Component<{ style?: JSX.CSSProperties }> = (props) => { await app.compile(entryFileId, app.config()); }; - createEffect(() => { - console.log(app.fileStates); - }); - return (
@@ -48,7 +44,7 @@ const CompilerPanel: Component<{ style?: JSX.CSSProperties }> = (props) => {