diff --git a/.gitignore b/.gitignore index 44d1841..b7d43f7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,11 @@ coverage/ tarpaulin-report.html cobertura.xml +# Fuzzing +fuzz/target/ +fuzz/corpus/ +fuzz/artifacts/ + # Rust target/ Cargo.lock diff --git a/Cargo.toml b/Cargo.toml index 1fa1015..108b955 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,46 +25,45 @@ path = "src/cli/main.rs" required-features = ["cli"] [features] -default = ["cli"] -cli = [ - "dep:clap", - "dep:anyhow", - "dep:tiktoken-rs", - "dep:comfy-table", - "dep:ratatui", - "dep:crossterm", - "dep:tui-textarea", - "dep:arboard", - "dep:syntect", - "dep:unicode-width", - "dep:chrono", -] +default = ["cli", "cli-stats", "tui", "tui-clipboard", "tui-time", "parallel"] +cli = ["dep:clap", "dep:anyhow"] +cli-stats = ["cli", "dep:tiktoken-rs", "dep:comfy-table"] +tui = ["dep:anyhow", "dep:ratatui", "dep:crossterm", "dep:tui-textarea"] +tui-clipboard = ["tui", "dep:arboard"] +tui-time = ["tui", "dep:chrono"] +parallel = ["dep:rayon"] [dependencies] serde = { version = "1.0.228", features = ["derive"] } indexmap = "2.0" serde_json = { version = "1.0.145", features = ["preserve_order"] } thiserror = "2.0.17" +itoa = "1.0" +ryu = "1.0" +rayon = { version = "1.10", optional = true } -# CLI dependencies (gated behind "cli" feature) +# CLI dependencies (gated behind "cli"/"cli-stats" features) clap = { version = "4.5.11", features = ["derive"], optional = true } anyhow = { version = "1.0.86", optional = true } tiktoken-rs = { version = "0.9.1", optional = true } comfy-table = { version = "7.1", optional = true } -# TUI dependencies (gated behind "cli" feature) +# TUI dependencies (gated behind "tui" feature) ratatui = { version = "0.29", optional = true } crossterm = { version = "0.28", optional = true } tui-textarea = { version = "0.7", optional = true } arboard = { version = "3.4", optional = true } -syntect = { version = "5.2", optional = true } -unicode-width = { version = "0.2", optional = true } chrono = { version = "0.4", optional = true } [dev-dependencies] datatest-stable = "0.3.3" glob = "0.3" +criterion = "0.5" [[test]] name = "spec_fixtures" harness = false + +[[bench]] +name = "encode_decode" +harness = false diff --git a/README.md b/README.md index b42d6d7..68fc725 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,25 @@ users[2]{id,name}: - **Strict Validation**: Enforces all spec rules (configurable) - **Well-Tested**: Comprehensive test suite with unit tests, spec fixtures, and real-world scenarios +## Performance Snapshot (Criterion) + +Snapshot from commit `f5b1b7e` using: +`cargo bench --bench encode_decode -- --save-baseline current --noplot` + +| Benchmark | Median | +| --- | --- | +| `tabular/encode/128` | 145.81 us | +| `tabular/decode/128` | 115.51 us | +| `tabular/encode/1024` | 1.2059 ms | +| `tabular/decode/1024` | 949.65 us | +| `deep_object/encode/32` | 11.766 us | +| `deep_object/decode/32` | 10.930 us | +| `deep_object/encode/128` | 46.867 us | +| `deep_object/decode/128` | 49.468 us | +| `decode_long_unquoted` | 10.554 us | + +Numbers vary by machine; use Criterion baselines to compare before/after changes. + ## Installation ### As a Library @@ -53,6 +72,22 @@ cargo add toon-format cargo install toon-format ``` +### Feature Flags + +By default, all CLI/TUI features are enabled. You can opt in to only what you need: + +```toml +toon-format = { version = "0.4", default-features = false } +``` + +```bash +cargo install toon-format --no-default-features --features cli +cargo install toon-format --no-default-features --features cli,cli-stats +cargo install toon-format --no-default-features --features cli,tui,tui-clipboard,tui-time +``` + +Feature summary: `cli`, `cli-stats`, `tui`, `tui-clipboard`, `tui-time`, `parallel`. + --- ## Library Usage @@ -126,6 +161,38 @@ fn main() -> Result<(), toon_format::ToonError> { Ok(()) } ``` + +### Serde-Style API + +Prefer serde_json-like helpers? Use `to_string`/`from_str` and friends: + +```rust +use serde::{Deserialize, Serialize}; +use toon_format::{from_reader, from_str, to_string, to_writer}; + +#[derive(Debug, Serialize, Deserialize, PartialEq)] +struct User { + name: String, + age: u32, +} + +let user = User { + name: "Ada".to_string(), + age: 37, +}; + +let toon = to_string(&user)?; +let round_trip: User = from_str(&toon)?; + +let mut buffer = Vec::new(); +to_writer(&mut buffer, &user)?; +let round_trip: User = from_reader(buffer.as_slice())?; +# Ok::<(), toon_format::ToonError>(()) +``` + +Option-aware variants: `to_string_with_options`, `to_writer_with_options`, +`from_str_with_options`, `from_slice_with_options`, `from_reader_with_options`. + --- ## API Reference @@ -577,6 +644,12 @@ cargo fmt # Build docs cargo doc --open + +# Fuzz targets (requires nightly + cargo-fuzz) +cargo install cargo-fuzz +cargo +nightly fuzz build +cargo +nightly fuzz run fuzz_decode -- -max_total_time=10 +cargo +nightly fuzz run fuzz_encode -- -max_total_time=10 ``` --- diff --git a/benches/encode_decode.rs b/benches/encode_decode.rs new file mode 100644 index 0000000..ee97f99 --- /dev/null +++ b/benches/encode_decode.rs @@ -0,0 +1,92 @@ +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; +use serde_json::{json, Value}; +use toon_format::{decode_default, encode_default}; + +fn make_tabular(rows: usize) -> Value { + let mut items = Vec::with_capacity(rows); + for i in 0..rows { + items.push(json!({ + "id": i, + "name": format!("User_{i}"), + "score": i * 2, + "active": i % 2 == 0, + "tag": format!("tag{i}"), + })); + } + Value::Array(items) +} + +fn make_deep_object(depth: usize) -> Value { + let mut value = json!({ + "leaf": "value", + "count": 1, + }); + + for i in 0..depth { + value = json!({ + format!("level_{i}"): value, + }); + } + + value +} + +fn make_long_unquoted(words: usize) -> String { + let mut parts = Vec::with_capacity(words); + for i in 0..words { + parts.push(format!("word{i}")); + } + parts.join(" ") +} + +fn bench_tabular(c: &mut Criterion) { + let mut group = c.benchmark_group("tabular"); + for rows in [128_usize, 1024] { + let value = make_tabular(rows); + let toon = encode_default(&value).expect("encode tabular"); + + group.bench_with_input(BenchmarkId::new("encode", rows), &value, |b, val| { + b.iter(|| encode_default(black_box(val)).expect("encode tabular")); + }); + + group.bench_with_input(BenchmarkId::new("decode", rows), &toon, |b, input| { + b.iter(|| decode_default::(black_box(input)).expect("decode tabular")); + }); + } + group.finish(); +} + +fn bench_deep_object(c: &mut Criterion) { + let mut group = c.benchmark_group("deep_object"); + for depth in [32_usize, 128] { + let value = make_deep_object(depth); + let toon = encode_default(&value).expect("encode deep object"); + + group.bench_with_input(BenchmarkId::new("encode", depth), &value, |b, val| { + b.iter(|| encode_default(black_box(val)).expect("encode deep object")); + }); + + group.bench_with_input(BenchmarkId::new("decode", depth), &toon, |b, input| { + b.iter(|| decode_default::(black_box(input)).expect("decode deep object")); + }); + } + group.finish(); +} + +fn bench_long_unquoted(c: &mut Criterion) { + let words = 512; + let long_value = make_long_unquoted(words); + let toon = format!("value: {long_value}"); + + c.bench_function("decode_long_unquoted", |b| { + b.iter(|| decode_default::(black_box(&toon)).expect("decode long unquoted")); + }); +} + +criterion_group!( + benches, + bench_tabular, + bench_deep_object, + bench_long_unquoted +); +criterion_main!(benches); diff --git a/examples/parts/arrays.rs b/examples/parts/arrays.rs index 7f68638..260a301 100644 --- a/examples/parts/arrays.rs +++ b/examples/parts/arrays.rs @@ -1,7 +1,4 @@ -use serde::{ - Deserialize, - Serialize, -}; +use serde::{Deserialize, Serialize}; use serde_json::json; use toon_format::encode_default; diff --git a/examples/parts/arrays_of_arrays.rs b/examples/parts/arrays_of_arrays.rs index 5eb6ae7..bd6c319 100644 --- a/examples/parts/arrays_of_arrays.rs +++ b/examples/parts/arrays_of_arrays.rs @@ -1,7 +1,4 @@ -use serde::{ - Deserialize, - Serialize, -}; +use serde::{Deserialize, Serialize}; use serde_json::json; use toon_format::encode_default; diff --git a/examples/parts/decode_strict.rs b/examples/parts/decode_strict.rs index 14303bd..5af6bf9 100644 --- a/examples/parts/decode_strict.rs +++ b/examples/parts/decode_strict.rs @@ -1,8 +1,5 @@ use serde_json::Value; -use toon_format::{ - decode, - DecodeOptions, -}; +use toon_format::{decode, DecodeOptions}; pub fn decode_strict() { // Malformed: header says 2 rows, but only 1 provided diff --git a/examples/parts/delimiters.rs b/examples/parts/delimiters.rs index b44eeaa..338e7ee 100644 --- a/examples/parts/delimiters.rs +++ b/examples/parts/delimiters.rs @@ -1,9 +1,5 @@ use serde_json::json; -use toon_format::{ - encode, - Delimiter, - EncodeOptions, -}; +use toon_format::{encode, Delimiter, EncodeOptions}; pub fn delimiters() { let data = json!({ diff --git a/examples/parts/mixed_arrays.rs b/examples/parts/mixed_arrays.rs index c3e120b..3cf4afd 100644 --- a/examples/parts/mixed_arrays.rs +++ b/examples/parts/mixed_arrays.rs @@ -1,7 +1,4 @@ -use serde::{ - Deserialize, - Serialize, -}; +use serde::{Deserialize, Serialize}; use serde_json::json; use toon_format::encode_default; diff --git a/examples/parts/objects.rs b/examples/parts/objects.rs index d7b1bca..da6769f 100644 --- a/examples/parts/objects.rs +++ b/examples/parts/objects.rs @@ -1,7 +1,4 @@ -use serde::{ - Deserialize, - Serialize, -}; +use serde::{Deserialize, Serialize}; use serde_json::json; use toon_format::encode_default; diff --git a/examples/parts/round_trip.rs b/examples/parts/round_trip.rs index 5c5c3bd..b5873a8 100644 --- a/examples/parts/round_trip.rs +++ b/examples/parts/round_trip.rs @@ -1,15 +1,6 @@ -use serde::{ - Deserialize, - Serialize, -}; -use serde_json::{ - json, - Value, -}; -use toon_format::{ - decode_default, - encode_default, -}; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; +use toon_format::{decode_default, encode_default}; #[derive(Debug, Serialize, Deserialize, PartialEq)] struct Product { diff --git a/examples/parts/structs.rs b/examples/parts/structs.rs index 7b49e48..14ec066 100644 --- a/examples/parts/structs.rs +++ b/examples/parts/structs.rs @@ -1,11 +1,5 @@ -use serde::{ - Deserialize, - Serialize, -}; -use toon_format::{ - decode_default, - encode_default, -}; +use serde::{Deserialize, Serialize}; +use toon_format::{decode_default, encode_default}; #[derive(Debug, Serialize, Deserialize, PartialEq)] struct User { diff --git a/examples/parts/tabular.rs b/examples/parts/tabular.rs index 1e210ca..c93ba08 100644 --- a/examples/parts/tabular.rs +++ b/examples/parts/tabular.rs @@ -1,7 +1,4 @@ -use serde::{ - Deserialize, - Serialize, -}; +use serde::{Deserialize, Serialize}; use serde_json::json; use toon_format::encode_default; diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..3bc837f --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "toon-format-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +toon-format = { path = ".." } +arbitrary = { version = "1", features = ["derive"] } +serde_json = "1" + +[[bin]] +name = "fuzz_decode" +path = "fuzz_targets/decode.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_encode" +path = "fuzz_targets/encode.rs" +test = false +doc = false +bench = false diff --git a/fuzz/fuzz_targets/decode.rs b/fuzz/fuzz_targets/decode.rs new file mode 100644 index 0000000..ec4b24e --- /dev/null +++ b/fuzz/fuzz_targets/decode.rs @@ -0,0 +1,13 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use toon_format::{decode, DecodeOptions}; + +fuzz_target!(|data: &[u8]| { + if let Ok(s) = std::str::from_utf8(data) { + let _ = decode::(s, &DecodeOptions::default()); + + let strict = DecodeOptions::new().with_strict(true); + let _ = decode::(s, &strict); + } +}); diff --git a/fuzz/fuzz_targets/encode.rs b/fuzz/fuzz_targets/encode.rs new file mode 100644 index 0000000..003e429 --- /dev/null +++ b/fuzz/fuzz_targets/encode.rs @@ -0,0 +1,11 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use serde_json::Value; +use toon_format::{encode, EncodeOptions}; + +fuzz_target!(|data: &[u8]| { + if let Ok(json) = serde_json::from_slice::(data) { + let _ = encode(&json, &EncodeOptions::default()); + } +}); diff --git a/src/cli/main.rs b/src/cli/main.rs index ecc8e9f..5115956 100644 --- a/src/cli/main.rs +++ b/src/cli/main.rs @@ -1,36 +1,19 @@ use std::{ fs, - io::{ - self, - Read, - Write, - }, - path::{ - Path, - PathBuf, - }, + io::{self, Read, Write}, + path::{Path, PathBuf}, }; -use anyhow::{ - bail, - Context, - Result, -}; +use anyhow::{bail, Context, Result}; use clap::Parser; +#[cfg(feature = "cli-stats")] use comfy_table::Table; use serde::Serialize; +#[cfg(feature = "cli-stats")] use tiktoken_rs::cl100k_base; use toon_format::{ - decode, - encode, - types::{ - DecodeOptions, - Delimiter, - EncodeOptions, - Indent, - KeyFoldingMode, - PathExpansionMode, - }, + decode, encode, + types::{DecodeOptions, Delimiter, EncodeOptions, Indent, KeyFoldingMode, PathExpansionMode}, }; #[derive(Parser, Debug)] @@ -186,39 +169,8 @@ fn run_encode(cli: &Cli, input: &str) -> Result<()> { write_output(cli.output.clone(), &toon_str)?; - if cli.output.is_none() && !toon_str.ends_with('\n') { - io::stdout().write_all(b"\n")?; - } - if cli.stats { - let json_bytes = input.len(); - let toon_bytes = toon_str.len(); - let size_savings = 100.0 * (1.0 - (toon_bytes as f64 / json_bytes as f64)); - - let bpe = cl100k_base().context("Failed to load tokenizer")?; - let json_tokens = bpe.encode_with_special_tokens(input).len(); - let toon_tokens = bpe.encode_with_special_tokens(&toon_str).len(); - let token_savings = 100.0 * (1.0 - (toon_tokens as f64 / json_tokens as f64)); - - eprintln!("\nStats:"); - let mut table = Table::new(); - table.set_header(vec!["Metric", "JSON", "TOON", "Savings"]); - - table.add_row(vec![ - "Tokens", - &json_tokens.to_string(), - &toon_tokens.to_string(), - &format!("{token_savings:.2}%"), - ]); - - table.add_row(vec![ - "Size (bytes)", - &json_bytes.to_string(), - &toon_bytes.to_string(), - &format!("{size_savings:.2}%"), - ]); - - eprintln!("\n{table}\n"); + render_stats(input, &toon_str)?; } Ok(()) @@ -313,6 +265,11 @@ fn determine_operation(cli: &Cli) -> Result<(Operation, bool)> { } fn validate_flags(cli: &Cli, operation: &Operation) -> Result<()> { + #[cfg(not(feature = "cli-stats"))] + if cli.stats { + bail!("--stats requires the 'cli-stats' feature"); + } + match operation { Operation::Encode => { if cli.no_strict { @@ -382,6 +339,50 @@ fn main() -> Result<()> { Ok(()) } +#[cfg(feature = "cli-stats")] +fn render_stats(input: &str, toon_str: &str) -> Result<()> { + let json_bytes = input.len(); + let toon_bytes = toon_str.len(); + let size_savings = 100.0 * (1.0 - (toon_bytes as f64 / json_bytes as f64)); + + let bpe = cl100k_base().context("Failed to load tokenizer")?; + let json_tokens = bpe.encode_with_special_tokens(input).len(); + let toon_tokens = bpe.encode_with_special_tokens(toon_str).len(); + let token_savings = 100.0 * (1.0 - (toon_tokens as f64 / json_tokens as f64)); + + eprintln!("\nStats:"); + let mut table = Table::new(); + table.set_header(vec!["Metric", "JSON", "TOON", "Savings"]); + + table.add_row(vec![ + "Tokens", + &json_tokens.to_string(), + &toon_tokens.to_string(), + &format!("{token_savings:.2}%"), + ]); + + table.add_row(vec![ + "Size (bytes)", + &json_bytes.to_string(), + &toon_bytes.to_string(), + &format!("{size_savings:.2}%"), + ]); + + eprintln!("\n{table}\n"); + Ok(()) +} + +#[cfg(not(feature = "cli-stats"))] +fn render_stats(_input: &str, _toon_str: &str) -> Result<()> { + bail!("--stats requires the 'cli-stats' feature"); +} + +#[cfg(not(feature = "tui"))] +fn run_interactive() -> Result<()> { + bail!("Interactive mode requires the 'tui' feature"); +} + +#[cfg(feature = "tui")] fn run_interactive() -> Result<()> { toon_format::tui::run().context("Failed to run interactive TUI")?; Ok(()) diff --git a/src/constants.rs b/src/constants.rs index 2d9d444..9a57f2f 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -24,7 +24,7 @@ pub(crate) const QUOTED_KEY_MARKER: char = '\x00'; #[inline] pub fn is_structural_char(ch: char) -> bool { - STRUCTURAL_CHARS.contains(&ch) + matches!(ch, '[' | ']' | '{' | '}' | ':' | '-') } #[inline] diff --git a/src/decode/expansion.rs b/src/decode/expansion.rs index cd645bc..2162aa0 100644 --- a/src/decode/expansion.rs +++ b/src/decode/expansion.rs @@ -2,16 +2,10 @@ use indexmap::IndexMap; use crate::{ constants::QUOTED_KEY_MARKER, - types::{ - is_identifier_segment, - JsonValue as Value, - PathExpansionMode, - ToonError, - ToonResult, - }, + types::{is_identifier_segment, JsonValue as Value, PathExpansionMode, ToonError, ToonResult}, }; -pub fn should_expand_key(key: &str, mode: PathExpansionMode) -> Option> { +pub fn should_expand_key(key: &str, mode: PathExpansionMode) -> Option> { match mode { PathExpansionMode::Off => None, PathExpansionMode::Safe => { @@ -24,25 +18,31 @@ pub fn should_expand_key(key: &str, mode: PathExpansionMode) -> Option = key.split('.').map(String::from).collect(); + let mut segment_count = 0; + for segment in key.split('.') { + if segment.is_empty() || !is_identifier_segment(segment) { + return None; + } + segment_count += 1; + } - if segments.len() < 2 { + if segment_count < 2 { return None; } - // Only expand if all segments are valid identifiers (safety requirement) - if segments.iter().all(|s| is_identifier_segment(s)) { - Some(segments) - } else { - None + let mut segments = Vec::with_capacity(segment_count); + for segment in key.split('.') { + segments.push(segment); } + + Some(segments) } } } pub fn deep_merge_value( target: &mut IndexMap, - segments: &[String], + segments: &[&str], value: Value, strict: bool, ) -> ToonResult<()> { @@ -51,7 +51,7 @@ pub fn deep_merge_value( } if segments.len() == 1 { - let key = &segments[0]; + let key = segments[0]; // Check for conflicts at leaf level if let Some(existing) = target.get(key) { @@ -62,11 +62,11 @@ pub fn deep_merge_value( } } - target.insert(key.clone(), value); + target.insert(key.to_string(), value); return Ok(()); } - let first_key = &segments[0]; + let first_key = segments[0]; let remaining_segments = &segments[1..]; // Get or create nested object, handling type conflicts @@ -80,7 +80,6 @@ pub fn deep_merge_value( {existing_value:?}", ))); } - // Replace non-object with empty object in non-strict mode *existing_value = Value::Object(IndexMap::new()); match existing_value { Value::Object(obj) => obj, @@ -89,8 +88,8 @@ pub fn deep_merge_value( } } } else { - target.insert(first_key.clone(), Value::Object(IndexMap::new())); - match target.get_mut(first_key).unwrap() { + target.insert(first_key.to_string(), Value::Object(IndexMap::new())); + match target.get_mut(first_key).expect("key was just inserted") { Value::Object(obj) => obj, _ => unreachable!(), } @@ -105,33 +104,36 @@ pub fn expand_paths_in_object( mode: PathExpansionMode, strict: bool, ) -> ToonResult> { - let mut result = IndexMap::new(); + let mut result = IndexMap::with_capacity(obj.len()); for (key, mut value) in obj { - // Expand nested objects first (depth-first) - if let Value::Object(nested_obj) = value { - value = Value::Object(expand_paths_in_object(nested_obj, mode, strict)?); - } + // Expand nested structures (arrays/objects) first (depth-first) + value = expand_paths_recursive(value, mode, strict)?; - // Strip marker from quoted keys - let clean_key = if key.starts_with(QUOTED_KEY_MARKER) { - key.strip_prefix(QUOTED_KEY_MARKER).unwrap().to_string() - } else { - key.clone() - }; - - if let Some(segments) = should_expand_key(&key, mode) { - deep_merge_value(&mut result, &segments, value, strict)?; - } else { - // Check for conflicts with expanded keys - if let Some(existing) = result.get(&clean_key) { - if strict { - return Err(ToonError::DeserializationError(format!( - "Key '{clean_key}' conflicts with existing value: {existing:?}", - ))); + match should_expand_key(&key, mode) { + Some(segments) => { + deep_merge_value(&mut result, &segments, value, strict)?; + } + None => { + // Strip marker from quoted keys + let clean_key = if key.starts_with(QUOTED_KEY_MARKER) { + let mut cleaned = key; + cleaned.remove(0); + cleaned + } else { + key + }; + + // Check for conflicts with expanded keys + if let Some(existing) = result.get(clean_key.as_str()) { + if strict { + return Err(ToonError::DeserializationError(format!( + "Key '{clean_key}' conflicts with existing value: {existing:?}", + ))); + } } + result.insert(clean_key, value); } - result.insert(clean_key, value); } } @@ -149,11 +151,11 @@ pub fn expand_paths_recursive( Ok(Value::Object(expanded)) } Value::Array(arr) => { - let expanded: Result, _> = arr - .into_iter() - .map(|v| expand_paths_recursive(v, mode, strict)) - .collect(); - Ok(Value::Array(expanded?)) + let mut expanded = Vec::with_capacity(arr.len()); + for item in arr { + expanded.push(expand_paths_recursive(item, mode, strict)?); + } + Ok(Value::Array(expanded)) } _ => Ok(value), } @@ -175,11 +177,11 @@ mod tests { // Valid expansions assert_eq!( should_expand_key("a.b", PathExpansionMode::Safe), - Some(vec!["a".to_string(), "b".to_string()]) + Some(vec!["a", "b"]) ); assert_eq!( should_expand_key("a.b.c", PathExpansionMode::Safe), - Some(vec!["a".to_string(), "b".to_string(), "c".to_string()]) + Some(vec!["a", "b", "c"]) ); // No dots @@ -193,13 +195,7 @@ mod tests { #[test] fn test_deep_merge_simple() { let mut target = IndexMap::new(); - deep_merge_value( - &mut target, - &["a".to_string(), "b".to_string()], - Value::from(json!(1)), - true, - ) - .unwrap(); + deep_merge_value(&mut target, &["a", "b"], Value::from(json!(1)), true).unwrap(); let expected = json!({"a": {"b": 1}}); assert_eq!(Value::Object(target), Value::from(expected)); @@ -209,21 +205,9 @@ mod tests { fn test_deep_merge_multiple_paths() { let mut target = IndexMap::new(); - deep_merge_value( - &mut target, - &["a".to_string(), "b".to_string()], - Value::from(json!(1)), - true, - ) - .unwrap(); - - deep_merge_value( - &mut target, - &["a".to_string(), "c".to_string()], - Value::from(json!(2)), - true, - ) - .unwrap(); + deep_merge_value(&mut target, &["a", "b"], Value::from(json!(1)), true).unwrap(); + + deep_merge_value(&mut target, &["a", "c"], Value::from(json!(2)), true).unwrap(); let expected = json!({"a": {"b": 1, "c": 2}}); assert_eq!(Value::Object(target), Value::from(expected)); @@ -234,12 +218,7 @@ mod tests { let mut target = IndexMap::new(); target.insert("a".to_string(), Value::from(json!({"b": 1}))); - let result = deep_merge_value( - &mut target, - &["a".to_string(), "b".to_string()], - Value::from(json!(2)), - true, - ); + let result = deep_merge_value(&mut target, &["a", "b"], Value::from(json!(2)), true); assert!(result.is_err()); } @@ -249,13 +228,7 @@ mod tests { let mut target = IndexMap::new(); target.insert("a".to_string(), Value::from(json!({"b": 1}))); - deep_merge_value( - &mut target, - &["a".to_string(), "b".to_string()], - Value::from(json!(2)), - false, - ) - .unwrap(); + deep_merge_value(&mut target, &["a", "b"], Value::from(json!(2)), false).unwrap(); let expected = json!({"a": {"b": 2}}); assert_eq!(Value::Object(target), Value::from(expected)); diff --git a/src/decode/mod.rs b/src/decode/mod.rs index 574b3a7..420055c 100644 --- a/src/decode/mod.rs +++ b/src/decode/mod.rs @@ -6,10 +6,7 @@ pub mod validation; use serde_json::Value; -use crate::types::{ - DecodeOptions, - ToonResult, -}; +use crate::types::{DecodeOptions, ToonResult}; /// Decode a TOON string into any deserializable type. /// diff --git a/src/decode/parser.rs b/src/decode/parser.rs index 0f40084..80f23c4 100644 --- a/src/decode/parser.rs +++ b/src/decode/parser.rs @@ -1,30 +1,14 @@ -use serde_json::{ - Map, - Number, - Value, -}; +use serde_json::{Map, Number, Value}; +use std::sync::Arc; use crate::{ - constants::{ - KEYWORDS, - MAX_DEPTH, - QUOTED_KEY_MARKER, - }, + constants::{KEYWORDS, MAX_DEPTH, QUOTED_KEY_MARKER}, decode::{ - scanner::{ - Scanner, - Token, - }, + scanner::{Scanner, Token}, validation, }, - types::{ - DecodeOptions, - Delimiter, - ErrorContext, - ToonError, - ToonResult, - }, - utils::validation::validate_depth, + types::{DecodeOptions, Delimiter, ErrorContext, PathExpansionMode, ToonError, ToonResult}, + utils::{is_valid_unquoted_key, validation::validate_depth}, }; /// Context for parsing arrays to determine correct indentation depth. @@ -44,26 +28,31 @@ enum ArrayParseContext { /// Parser that builds JSON values from a sequence of tokens. #[allow(unused)] -pub struct Parser<'a> { +pub struct Parser { scanner: Scanner, current_token: Token, options: DecodeOptions, delimiter: Option, - input: &'a str, + delimiter_stack: Vec>, + input: Arc, } -impl<'a> Parser<'a> { +impl Parser { /// Create a new parser with the given input and options. - pub fn new(input: &'a str, options: DecodeOptions) -> ToonResult { - let mut scanner = Scanner::new(input); + pub fn new(input: &str, options: DecodeOptions) -> ToonResult { + let input: Arc = Arc::from(input); + let mut scanner = Scanner::from_shared_input(input.clone()); let chosen_delim = options.delimiter; scanner.set_active_delimiter(chosen_delim); + scanner.set_coerce_types(options.coerce_types); + scanner.configure_indentation(options.strict, options.indent.get_spaces()); let current_token = scanner.scan_token()?; Ok(Self { scanner, current_token, delimiter: chosen_delim, + delimiter_stack: Vec::new(), options, input, }) @@ -103,6 +92,93 @@ impl<'a> Parser<'a> { Ok(()) } + fn push_delimiter(&mut self, delimiter: Option) { + self.delimiter_stack.push(self.delimiter); + self.delimiter = delimiter; + self.scanner.set_active_delimiter(delimiter); + } + + fn pop_delimiter(&mut self) { + if let Some(previous) = self.delimiter_stack.pop() { + self.delimiter = previous; + self.scanner.set_active_delimiter(previous); + if let (Some(delim), Token::String(value, was_quoted)) = (previous, &self.current_token) + { + if !*was_quoted && value.len() == 1 && value.starts_with(delim.as_char()) { + self.current_token = Token::Delimiter(delim); + } + } + } + } + + fn format_key(&self, key: &str, was_quoted: bool) -> String { + if was_quoted && key.contains('.') { + format!("{QUOTED_KEY_MARKER}{key}") + } else { + key.to_string() + } + } + + fn validate_unquoted_key(&self, key: &str, was_quoted: bool) -> ToonResult<()> { + if self.options.strict && !was_quoted { + if self.options.expand_paths != PathExpansionMode::Off && key.contains('.') { + return Ok(()); + } + + if !is_valid_unquoted_key(key) { + return Err(self + .parse_error_with_context(format!("Invalid unquoted key: '{key}'")) + .with_suggestion("Quote the key to use special characters")); + } + } + Ok(()) + } + + fn validate_unquoted_string(&self, value: &str, was_quoted: bool) -> ToonResult<()> { + if self.options.strict && !was_quoted && value.contains('\t') { + return Err(self + .parse_error_with_context("Unquoted tab characters are not allowed in strict mode") + .with_suggestion("Quote the value to include tabs")); + } + Ok(()) + } + + fn is_key_token(&self) -> bool { + matches!( + self.current_token, + Token::String(_, _) | Token::Bool(_) | Token::Null + ) + } + + fn key_from_token(&self) -> Option<(String, bool)> { + match &self.current_token { + Token::String(s, was_quoted) => Some((self.format_key(s, *was_quoted), *was_quoted)), + Token::Bool(b) => Some(( + if *b { + KEYWORDS[1].to_string() + } else { + KEYWORDS[2].to_string() + }, + false, + )), + Token::Null => Some((KEYWORDS[0].to_string(), false)), + _ => None, + } + } + + fn find_unexpected_delimiter( + &self, + field: &str, + expected: Option, + ) -> Option { + let expected = expected?; + let delimiters = [Delimiter::Comma, Delimiter::Pipe, Delimiter::Tab]; + + delimiters + .into_iter() + .find(|delim| *delim != expected && field.contains(delim.as_char())) + } + fn parse_value(&mut self) -> ToonResult { self.parse_value_with_depth(0) } @@ -120,7 +196,7 @@ impl<'a> Parser<'a> { if next_char_is_colon { let key = KEYWORDS[0].to_string(); self.advance()?; - self.parse_object_with_initial_key(key, depth) + self.parse_object_with_initial_key(key, false, depth) } else { self.advance()?; Ok(Value::Null) @@ -135,7 +211,7 @@ impl<'a> Parser<'a> { KEYWORDS[2].to_string() }; self.advance()?; - self.parse_object_with_initial_key(key, depth) + self.parse_object_with_initial_key(key, false, depth) } else { let val = *b; self.advance()?; @@ -147,7 +223,7 @@ impl<'a> Parser<'a> { if next_char_is_colon { let key = i.to_string(); self.advance()?; - self.parse_object_with_initial_key(key, depth) + self.parse_object_with_initial_key(key, false, depth) } else { let val = *i; self.advance()?; @@ -159,7 +235,7 @@ impl<'a> Parser<'a> { if next_char_is_colon { let key = n.to_string(); self.advance()?; - self.parse_object_with_initial_key(key, depth) + self.parse_object_with_initial_key(key, false, depth) } else { let val = *n; self.advance()?; @@ -175,13 +251,15 @@ impl<'a> Parser<'a> { } } } - Token::String(s, _) => { + Token::String(s, was_quoted) => { + let key_was_quoted = *was_quoted; let first = s.clone(); self.advance()?; match &self.current_token { Token::Colon | Token::LeftBracket => { - self.parse_object_with_initial_key(first, depth) + let key = self.format_key(&first, key_was_quoted); + self.parse_object_with_initial_key(key, key_was_quoted, depth) } _ => { // Strings on new indented lines could be missing colons (keys) or values @@ -206,6 +284,7 @@ impl<'a> Parser<'a> { accumulated.push_str(next); self.advance()?; } + self.validate_unquoted_string(&accumulated, key_was_quoted)?; Ok(Value::String(accumulated)) } } @@ -232,7 +311,7 @@ impl<'a> Parser<'a> { break; } - let current_indent = self.scanner.get_last_line_indent(); + let current_indent = self.normalize_indent(self.scanner.get_last_line_indent()); if self.options.strict { self.validate_indentation(current_indent)?; @@ -247,17 +326,9 @@ impl<'a> Parser<'a> { base_indent = Some(current_indent); } - let key = match &self.current_token { - Token::String(s, was_quoted) => { - // Mark quoted keys containing dots with a special prefix - // so path expansion can skip them - if *was_quoted && s.contains('.') { - format!("{QUOTED_KEY_MARKER}{s}") - } else { - s.clone() - } - } - _ => { + let (key, was_quoted) = match self.key_from_token() { + Some(key) => key, + None => { return Err(self .parse_error_with_context(format!( "Expected key, found {:?}", @@ -266,6 +337,7 @@ impl<'a> Parser<'a> { .with_suggestion("Object keys must be strings")); } }; + self.validate_unquoted_key(&key, was_quoted)?; self.advance()?; let value = if matches!(self.current_token, Token::LeftBracket) { @@ -289,7 +361,12 @@ impl<'a> Parser<'a> { Ok(Value::Object(obj)) } - fn parse_object_with_initial_key(&mut self, key: String, depth: usize) -> ToonResult { + fn parse_object_with_initial_key( + &mut self, + key: String, + key_was_quoted: bool, + depth: usize, + ) -> ToonResult { validate_depth(depth, MAX_DEPTH)?; let mut obj = Map::new(); @@ -297,10 +374,12 @@ impl<'a> Parser<'a> { // Validate indentation for the initial key if in strict mode if self.options.strict { - let current_indent = self.scanner.get_last_line_indent(); + let current_indent = self.normalize_indent(self.scanner.get_last_line_indent()); self.validate_indentation(current_indent)?; } + self.validate_unquoted_key(&key, key_was_quoted)?; + if matches!(self.current_token, Token::LeftBracket) { let value = self.parse_array(depth)?; obj.insert(key, value); @@ -332,7 +411,7 @@ impl<'a> Parser<'a> { continue; } - let next_indent = self.scanner.get_last_line_indent(); + let next_indent = self.normalize_indent(self.scanner.get_last_line_indent()); // Check if the next line is at the right indentation level let should_continue = if let Some(expected) = base_indent { @@ -352,7 +431,7 @@ impl<'a> Parser<'a> { break; } - if !matches!(self.current_token, Token::String(_, _)) { + if !self.is_key_token() { break; } @@ -360,7 +439,7 @@ impl<'a> Parser<'a> { break; } - let current_indent = self.scanner.get_last_line_indent(); + let current_indent = self.normalize_indent(self.scanner.get_last_line_indent()); if let Some(expected) = base_indent { if current_indent != expected { @@ -382,18 +461,11 @@ impl<'a> Parser<'a> { base_indent = Some(current_indent); } - let key = match &self.current_token { - Token::String(s, was_quoted) => { - // Mark quoted keys containing dots with a special prefix - // so path expansion can skip them - if *was_quoted && s.contains('.') { - format!("{QUOTED_KEY_MARKER}{s}") - } else { - s.clone() - } - } - _ => break, + let (key, was_quoted) = match self.key_from_token() { + Some(key) => key, + None => break, }; + self.validate_unquoted_key(&key, was_quoted)?; self.advance()?; let value = if matches!(self.current_token, Token::LeftBracket) { @@ -419,6 +491,7 @@ impl<'a> Parser<'a> { let has_children = if matches!(self.current_token, Token::Newline) { let current_depth_indent = self.options.indent.get_spaces() * (depth + 1); let next_indent = self.scanner.count_leading_spaces(); + let next_indent = self.normalize_indent(next_indent); next_indent >= current_depth_indent } else { false @@ -433,12 +506,15 @@ impl<'a> Parser<'a> { self.parse_value_with_depth(depth + 1) } else { // Check if there's more content after the current token - let (rest, had_space) = self.scanner.read_rest_of_line_with_space_info(); + let (rest, leading_space) = self.scanner.read_rest_of_line_with_space_count(); let result = if rest.is_empty() { // Single token - convert directly to avoid redundant parsing match &self.current_token { - Token::String(s, _) => Ok(Value::String(s.clone())), + Token::String(s, was_quoted) => { + self.validate_unquoted_string(s, *was_quoted)?; + Ok(Value::String(s.clone())) + } Token::Integer(i) => Ok(serde_json::Number::from(*i).into()), Token::Number(n) => { let val = *n; @@ -458,13 +534,22 @@ impl<'a> Parser<'a> { } } else { // Multi-token value - reconstruct and re-parse as complete string - let mut value_str = String::new(); + let token_len = match &self.current_token { + Token::String(s, was_quoted) => s.len() + if *was_quoted { 2 } else { 0 }, + Token::Integer(_) => 20, + Token::Number(_) => 32, + Token::Bool(true) => 4, + Token::Bool(false) => 5, + Token::Null => 4, + _ => 0, + }; + let mut value_str = String::with_capacity(token_len + leading_space + rest.len()); match &self.current_token { Token::String(s, true) => { // Quoted strings need quotes preserved for re-parsing value_str.push('"'); - value_str.push_str(&crate::utils::escape_string(s)); + crate::utils::escape_string_into(&mut value_str, s); value_str.push('"'); } Token::String(s, false) => value_str.push_str(s), @@ -478,14 +563,22 @@ impl<'a> Parser<'a> { } // Only add space if there was whitespace in the original input - if had_space { - value_str.push(' '); + if !rest.is_empty() && leading_space > 0 { + value_str.extend(std::iter::repeat_n(' ', leading_space)); } value_str.push_str(&rest); let token = self.scanner.parse_value_string(&value_str)?; match token { - Token::String(s, _) => Ok(Value::String(s)), + Token::String(s, was_quoted) => { + if self.options.strict && !was_quoted && value_str.contains('\t') { + return Err(self.parse_error_with_context( + "Unquoted tab characters are not allowed in strict mode", + )); + } + self.validate_unquoted_string(&s, was_quoted)?; + Ok(Value::String(s)) + } Token::Integer(i) => Ok(serde_json::Number::from(i).into()), Token::Number(n) => { if n.is_finite() && n.fract() == 0.0 && n.abs() <= i64::MAX as f64 { @@ -519,9 +612,7 @@ impl<'a> Parser<'a> { self.parse_array(depth) } - fn parse_array_header( - &mut self, - ) -> ToonResult<(usize, Option, Option>)> { + fn parse_array_header(&mut self) -> ToonResult<(usize, Option, bool)> { if !matches!(self.current_token, Token::LeftBracket) { return Err(self.parse_error_with_context("Expected '['")); } @@ -529,27 +620,40 @@ impl<'a> Parser<'a> { // Parse array length (plain integer only) // Supports formats: [N], [N|], [N\t] (no # marker) - let length = if let Token::Integer(n) = &self.current_token { - *n as usize - } else if let Token::String(s, _) = &self.current_token { - // Check if string starts with # - this marker is not supported - if s.starts_with('#') { - return Err(self - .parse_error_with_context( - "Length marker '#' is not supported. Use [N] format instead of [#N]", - ) - .with_suggestion("Remove the '#' prefix from the array length")); + let length = match &self.current_token { + Token::Integer(n) => { + validation::validate_array_length_non_negative(*n)?; + *n as usize + } + Token::Number(_) => { + return Err(self.parse_error_with_context("Array length must be an integer")); } + Token::String(s, _) => { + // Check if string starts with # - this marker is not supported + if s.starts_with('#') { + return Err(self + .parse_error_with_context( + "Length marker '#' is not supported. Use [N] format instead of [#N]", + ) + .with_suggestion("Remove the '#' prefix from the array length")); + } - // Plain string that's a number: "3" - s.parse::().map_err(|_| { - self.parse_error_with_context(format!("Expected array length, found: {s}")) - })? - } else { - return Err(self.parse_error_with_context(format!( - "Expected array length, found {:?}", - self.current_token - ))); + if s.contains('.') || s.contains('e') || s.contains('E') { + return Err(self.parse_error_with_context("Array length must be an integer")); + } + + let parsed = s.parse::().map_err(|_| { + self.parse_error_with_context(format!("Expected array length, found: {s}")) + })?; + validation::validate_array_length_non_negative(parsed)?; + parsed as usize + } + _ => { + return Err(self.parse_error_with_context(format!( + "Expected array length, found {:?}", + self.current_token + ))); + } }; self.advance()?; @@ -576,11 +680,6 @@ impl<'a> Parser<'a> { _ => None, }; - // Default to comma if no delimiter specified - let active_delim = detected_delim.or(Some(Delimiter::Comma)); - - self.scanner.set_active_delimiter(active_delim); - if !matches!(self.current_token, Token::RightBracket) { return Err(self.parse_error_with_context(format!( "Expected ']', found {:?}", @@ -589,51 +688,114 @@ impl<'a> Parser<'a> { } self.advance()?; - let fields = if matches!(self.current_token, Token::LeftBrace) { - self.advance()?; - let mut fields = Vec::new(); + let has_fields = matches!(self.current_token, Token::LeftBrace); - loop { - match &self.current_token { - Token::String(s, _) => { - fields.push(s.clone()); - self.advance()?; + Ok((length, detected_delim, has_fields)) + } - if matches!(self.current_token, Token::RightBrace) { - break; - } + fn parse_field_list(&mut self, expected_delim: Option) -> ToonResult> { + if !matches!(self.current_token, Token::LeftBrace) { + return Err(self.parse_error_with_context("Expected '{' for field list")); + } + self.advance()?; - if matches!(self.current_token, Token::Delimiter(_)) { - self.advance()?; - } else { + let mut fields = Vec::new(); + let mut field_list_delim = None; + + loop { + match &self.current_token { + Token::String(s, was_quoted) => { + if self.options.strict { + if let Some(unexpected) = self.find_unexpected_delimiter(s, expected_delim) + { return Err(self.parse_error_with_context(format!( - "Expected delimiter or '}}', found {:?}", - self.current_token + "Field list delimiter {unexpected} does not match expected {}", + expected_delim + .map(|delim| delim.to_string()) + .unwrap_or_else(|| "none".to_string()) ))); } + self.validate_unquoted_key(s, *was_quoted)?; } - Token::RightBrace => break, - _ => { + + fields.push(self.format_key(s, *was_quoted)); + self.advance()?; + + if matches!(self.current_token, Token::RightBrace) { + break; + } + + if let Token::Delimiter(delim) = &self.current_token { + if self.options.strict { + validation::validate_delimiter_consistency( + Some(*delim), + expected_delim, + )?; + } + if field_list_delim.is_none() { + field_list_delim = Some(*delim); + } + self.advance()?; + } else { return Err(self.parse_error_with_context(format!( - "Expected field name, found {:?}", + "Expected delimiter or '}}', found {:?}", self.current_token - ))) + ))); } } - } + Token::Bool(_) | Token::Null => { + let (field, was_quoted) = match self.key_from_token() { + Some(key) => key, + None => { + return Err(self.parse_error_with_context(format!( + "Expected field name, found {:?}", + self.current_token + ))) + } + }; + self.validate_unquoted_key(&field, was_quoted)?; + fields.push(field); + self.advance()?; - self.advance()?; - Some(fields) - } else { - None - }; + if matches!(self.current_token, Token::RightBrace) { + break; + } - if !matches!(self.current_token, Token::Colon) { - return Err(self.parse_error_with_context("Expected ':' after array header")); + if let Token::Delimiter(delim) = &self.current_token { + if self.options.strict { + validation::validate_delimiter_consistency( + Some(*delim), + expected_delim, + )?; + } + if field_list_delim.is_none() { + field_list_delim = Some(*delim); + } + self.advance()?; + } else { + return Err(self.parse_error_with_context(format!( + "Expected delimiter or '}}', found {:?}", + self.current_token + ))); + } + } + Token::RightBrace => break, + _ => { + return Err(self.parse_error_with_context(format!( + "Expected field name, found {:?}", + self.current_token + ))) + } + } } + self.advance()?; + validation::validate_field_list(&fields)?; + if self.options.strict { + validation::validate_delimiter_consistency(field_list_delim, expected_delim)?; + } - Ok((length, detected_delim, fields)) + Ok(fields) } fn parse_array(&mut self, depth: usize) -> ToonResult { @@ -647,20 +809,54 @@ impl<'a> Parser<'a> { ) -> ToonResult { validate_depth(depth, MAX_DEPTH)?; - let (length, _detected_delim, fields) = self.parse_array_header()?; + let (length, detected_delim, has_fields) = self.parse_array_header()?; - if let Some(fields) = fields { - validation::validate_field_list(&fields)?; - self.parse_tabular_array(length, &fields, depth, context) - } else { - // Non-tabular arrays as first field of list items require depth adjustment - // (items at depth +2 relative to hyphen, not the usual +1) - let adjusted_depth = match context { - ArrayParseContext::Normal => depth, - ArrayParseContext::ListItemFirstField => depth + 1, + if let (Some(detected), Some(expected)) = (detected_delim, self.options.delimiter) { + if detected != expected { + return Err(self.parse_error_with_context(format!( + "Detected delimiter {detected} but expected {expected}" + ))); + } + } + + let active_delim = detected_delim + .or(self.options.delimiter) + .or(Some(Delimiter::Comma)); + + let mut pushed = false; + let result = (|| -> ToonResult { + self.push_delimiter(active_delim); + pushed = true; + + let fields = if has_fields { + Some(self.parse_field_list(active_delim)?) + } else { + None }; - self.parse_regular_array(length, adjusted_depth) + + if !matches!(self.current_token, Token::Colon) { + return Err(self.parse_error_with_context("Expected ':' after array header")); + } + self.advance()?; + + if let Some(fields) = fields { + self.parse_tabular_array(length, &fields, depth, context) + } else { + // Non-tabular arrays as first field of list items require depth adjustment + // (items at depth +2 relative to hyphen, not the usual +1) + let adjusted_depth = match context { + ArrayParseContext::Normal => depth, + ArrayParseContext::ListItemFirstField => depth + 1, + }; + self.parse_regular_array(length, adjusted_depth) + } + })(); + + if pushed { + self.pop_delimiter(); } + + result } fn parse_tabular_array( @@ -670,7 +866,7 @@ impl<'a> Parser<'a> { depth: usize, context: ArrayParseContext, ) -> ToonResult { - let mut rows = Vec::new(); + let mut rows = Vec::with_capacity(length); if !matches!(self.current_token, Token::Newline) { return Err(self @@ -679,7 +875,17 @@ impl<'a> Parser<'a> { } self.skip_newlines()?; - for row_index in 0..length { + // Tabular arrays as first field of list-item objects require rows at depth +2 + // (relative to hyphen), while normal tabular arrays use depth +1 + let row_depth_offset = match context { + ArrayParseContext::Normal => 1, + ArrayParseContext::ListItemFirstField => 2, + }; + let indent_size = self.options.indent.get_spaces(); + let expected_indent = indent_size * (depth + row_depth_offset); + + let mut row_index = 0; + loop { if matches!(self.current_token, Token::Eof) { if self.options.strict { return Err(self.parse_error_with_context(format!( @@ -691,15 +897,7 @@ impl<'a> Parser<'a> { break; } - let current_indent = self.scanner.get_last_line_indent(); - - // Tabular arrays as first field of list-item objects require rows at depth +2 - // (relative to hyphen), while normal tabular arrays use depth +1 - let row_depth_offset = match context { - ArrayParseContext::Normal => 1, - ArrayParseContext::ListItemFirstField => 2, - }; - let expected_indent = self.options.indent.get_spaces() * (depth + row_depth_offset); + let current_indent = self.normalize_indent(self.scanner.get_last_line_indent()); if self.options.strict { self.validate_indentation(current_indent)?; @@ -710,9 +908,14 @@ impl<'a> Parser<'a> { found {current_indent}" ))); } + } else { + let is_key_value = self.is_key_token() && matches!(self.scanner.peek(), Some(':')); + if current_indent != expected_indent || is_key_value { + break; + } } - let mut row = Map::new(); + let mut row = Map::with_capacity(fields.len()); for (field_index, field) in fields.iter().enumerate() { // Skip delimiter before each field except the first @@ -794,6 +997,7 @@ impl<'a> Parser<'a> { } rows.push(Value::Object(row)); + row_index += 1; if matches!(self.current_token, Token::Eof) { break; @@ -810,60 +1014,73 @@ impl<'a> Parser<'a> { } else { return Err(self.parse_error_with_context(format!( "Expected newline after tabular row {}", - row_index + 1 + row_index ))); } } - if row_index + 1 < length { - self.advance()?; - if self.options.strict && matches!(self.current_token, Token::Newline) { - return Err(self.parse_error_with_context( - "Blank lines are not allowed inside tabular arrays in strict mode", - )); - } + if self.options.strict { + if row_index < length { + self.advance()?; + if matches!(self.current_token, Token::Newline) { + return Err(self.parse_error_with_context( + "Blank lines are not allowed inside tabular arrays in strict mode", + )); + } - self.skip_newlines()?; - } else if matches!(self.current_token, Token::Newline) { - // After the last row, check if there are extra rows - self.advance()?; - self.skip_newlines()?; + self.skip_newlines()?; + } else if matches!(self.current_token, Token::Newline) { + // After the last row, check if there are extra rows + self.advance()?; + self.skip_newlines()?; - let expected_indent = self.options.indent.get_spaces() * (depth + 1); - let actual_indent = self.scanner.get_last_line_indent(); + let actual_indent = self.normalize_indent(self.scanner.get_last_line_indent()); - // If something at the same indent level, it might be a new row (error) - // unless it's a key-value pair (which belongs to parent) - if actual_indent == expected_indent && !matches!(self.current_token, Token::Eof) { - let is_key_value = matches!(self.current_token, Token::String(_, _)) - && matches!(self.scanner.peek(), Some(':')); + // If something at the same indent level, it might be a new row (error) + // unless it's a key-value pair (which belongs to parent) + if actual_indent == expected_indent && !matches!(self.current_token, Token::Eof) + { + let is_key_value = + self.is_key_token() && matches!(self.scanner.peek(), Some(':')); - if !is_key_value { - return Err(self.parse_error_with_context(format!( - "Array length mismatch: expected {length} rows, but more rows found", - ))); + if !is_key_value { + return Err(self.parse_error_with_context(format!( + "Array length mismatch: expected {length} rows, but more rows found", + ))); + } } } + + if row_index >= length { + break; + } + } else if matches!(self.current_token, Token::Newline) { + self.advance()?; + self.skip_newlines()?; } } - validation::validate_array_length(length, rows.len())?; + if self.options.strict { + validation::validate_array_length(length, rows.len())?; + } Ok(Value::Array(rows)) } fn parse_regular_array(&mut self, length: usize, depth: usize) -> ToonResult { - let mut items = Vec::new(); + let mut items = Vec::with_capacity(length); + let indent_size = self.options.indent.get_spaces(); match &self.current_token { Token::Newline => { self.skip_newlines()?; - let expected_indent = self.options.indent.get_spaces() * (depth + 1); + let expected_indent = indent_size * (depth + 1); - for i in 0..length { - let current_indent = self.scanner.get_last_line_indent(); - if self.options.strict { + if self.options.strict { + for i in 0..length { + let current_indent = + self.normalize_indent(self.scanner.get_last_line_indent()); self.validate_indentation(current_indent)?; if current_indent != expected_indent { @@ -872,227 +1089,443 @@ impl<'a> Parser<'a> { spaces, found {current_indent}" ))); } - } - if !matches!(self.current_token, Token::Dash) { - return Err(self - .parse_error_with_context(format!( - "Expected '-' for list item, found {:?}", - self.current_token - )) - .with_suggestion(format!( - "List arrays need '-' prefix for each item (item {} of {})", - i + 1, - length - ))); - } - self.advance()?; - - let value = if matches!(self.current_token, Token::Newline | Token::Eof) { - Value::Object(Map::new()) - } else if matches!(self.current_token, Token::LeftBracket) { - self.parse_array(depth + 1)? - } else if let Token::String(s, _) = &self.current_token { - let key = s.clone(); + if !matches!(self.current_token, Token::Dash) { + return Err(self + .parse_error_with_context(format!( + "Expected '-' for list item, found {:?}", + self.current_token + )) + .with_suggestion(format!( + "List arrays need '-' prefix for each item (item {} of {})", + i + 1, + length + ))); + } self.advance()?; - if matches!(self.current_token, Token::Colon | Token::LeftBracket) { - // This is an object: key followed by colon or array bracket - // First field of list-item object may be an array requiring special - // indentation - let first_value = if matches!(self.current_token, Token::LeftBracket) { - // Array directly after key (e.g., "- key[N]:") - // Use ListItemFirstField context to apply correct indentation - self.parse_array_with_context( - depth + 1, - ArrayParseContext::ListItemFirstField, - )? - } else { - self.advance()?; - // Handle nested arrays: "key: [2]: ..." - if matches!(self.current_token, Token::LeftBracket) { - // Array after colon - not directly on hyphen line, use normal - // context - self.parse_array(depth + 2)? - } else { - self.parse_field_value(depth + 2)? + let value = if matches!(self.current_token, Token::Newline | Token::Eof) { + Value::Object(Map::new()) + } else if matches!(self.current_token, Token::LeftBracket) { + self.parse_array(depth + 1)? + } else if self.is_key_token() { + let (key, key_was_quoted) = match self.key_from_token() { + Some(key) => key, + None => { + return Err(self.parse_error_with_context(format!( + "Expected key, found {:?}", + self.current_token + ))); } }; + self.validate_unquoted_key(&key, key_was_quoted)?; + self.advance()?; + + if matches!(self.current_token, Token::Colon | Token::LeftBracket) { + // This is an object: key followed by colon or array bracket + // First field of list-item object may be an array requiring special + // indentation + let first_value = + if matches!(self.current_token, Token::LeftBracket) { + // Array directly after key (e.g., "- key[N]:") + // Use ListItemFirstField context to apply correct indentation + self.parse_array_with_context( + depth + 1, + ArrayParseContext::ListItemFirstField, + )? + } else { + self.advance()?; + // Handle nested arrays: "key: [2]: ..." + if matches!(self.current_token, Token::LeftBracket) { + // Array after colon - not directly on hyphen line, use normal + // context + self.parse_array(depth + 2)? + } else { + self.parse_field_value(depth + 2)? + } + }; - let mut obj = Map::new(); - obj.insert(key, first_value); + let mut obj = Map::new(); + obj.insert(key, first_value); - let field_indent = self.options.indent.get_spaces() * (depth + 2); + let field_indent = indent_size * (depth + 2); - // Check if there are more fields at the same indentation level - let should_parse_more_fields = - if matches!(self.current_token, Token::Newline) { - let next_indent = self.scanner.count_leading_spaces(); + // Check if there are more fields at the same indentation level + let should_parse_more_fields = + if matches!(self.current_token, Token::Newline) { + let next_indent = self.scanner.count_leading_spaces(); + let next_indent = self.normalize_indent(next_indent); - if next_indent < field_indent { - false - } else { - self.advance()?; + if next_indent < field_indent { + false + } else { + self.advance()?; - if !self.options.strict { - self.skip_newlines()?; + if !self.options.strict { + self.skip_newlines()?; + } + true } - true - } - } else if matches!(self.current_token, Token::String(_, _)) { - // When already positioned at a field key, check its indent - let current_indent = self.scanner.get_last_line_indent(); - current_indent == field_indent - } else { - false - }; - - // Parse additional fields if they're at the right indentation - if should_parse_more_fields { - while !matches!(self.current_token, Token::Eof) { - let current_indent = self.scanner.get_last_line_indent(); - - if current_indent < field_indent { - break; - } + } else if self.is_key_token() { + // When already positioned at a field key, check its indent + let current_indent = self + .normalize_indent(self.scanner.get_last_line_indent()); + current_indent == field_indent + } else { + false + }; - if current_indent != field_indent && self.options.strict { - break; - } + // Parse additional fields if they're at the right indentation + if should_parse_more_fields { + while !matches!(self.current_token, Token::Eof) { + let current_indent = self + .normalize_indent(self.scanner.get_last_line_indent()); - // Stop if we hit the next list item - if matches!(self.current_token, Token::Dash) { - break; - } + if current_indent != field_indent { + break; + } - let field_key = match &self.current_token { - Token::String(s, _) => s.clone(), - _ => break, - }; - self.advance()?; + // Stop if we hit the next list item + if matches!(self.current_token, Token::Dash) { + break; + } - let field_value = - if matches!(self.current_token, Token::LeftBracket) { - self.parse_array(depth + 2)? - } else if matches!(self.current_token, Token::Colon) { - self.advance()?; + let (field_key, field_key_was_quoted) = + match self.key_from_token() { + Some(key) => key, + None => break, + }; + self.validate_unquoted_key( + &field_key, + field_key_was_quoted, + )?; + self.advance()?; + + let field_value = if matches!(self.current_token, Token::LeftBracket) { self.parse_array(depth + 2)? + } else if matches!(self.current_token, Token::Colon) { + self.advance()?; + if matches!(self.current_token, Token::LeftBracket) + { + self.parse_array(depth + 2)? + } else { + self.parse_field_value(depth + 2)? + } } else { - self.parse_field_value(depth + 2)? + break; + }; + + obj.insert(field_key, field_value); + + if matches!(self.current_token, Token::Newline) { + let next_indent = self.scanner.count_leading_spaces(); + let next_indent = self.normalize_indent(next_indent); + if next_indent < field_indent { + break; + } + self.advance()?; + if !self.options.strict { + self.skip_newlines()?; } } else { break; - }; + } + } + } + + Value::Object(obj) + } else if matches!(self.current_token, Token::LeftBracket) { + // Array as object value: "key[2]: ..." + let array_value = self.parse_array(depth + 1)?; + let mut obj = Map::new(); + obj.insert(key, array_value); + Value::Object(obj) + } else { + // Plain string value + Value::String(key) + } + } else { + self.parse_primitive()? + }; + + items.push(value); + + if items.len() < length { + if matches!(self.current_token, Token::Newline) { + self.advance()?; + + if self.options.strict + && matches!(self.current_token, Token::Newline) + { + return Err(self.parse_error_with_context( + "Blank lines are not allowed inside list arrays in strict mode", + )); + } + + self.skip_newlines()?; + } else if !matches!(self.current_token, Token::Dash) { + return Err(self.parse_error_with_context(format!( + "Expected newline or next list item after list item {}", + i + 1 + ))); + } + } else if matches!(self.current_token, Token::Newline) { + // After the last item, check for extra items + self.advance()?; + self.skip_newlines()?; - obj.insert(field_key, field_value); + let list_indent = indent_size * (depth + 1); + let actual_indent = + self.normalize_indent(self.scanner.get_last_line_indent()); + // If we see another dash at the same indent, there are too many items + if actual_indent == list_indent + && matches!(self.current_token, Token::Dash) + { + return Err(self.parse_error_with_context(format!( + "Array length mismatch: expected {length} items, but more items \ + found", + ))); + } + } + } + } else { + loop { + if matches!(self.current_token, Token::Eof) { + break; + } + let current_indent = + self.normalize_indent(self.scanner.get_last_line_indent()); + if current_indent != expected_indent { + break; + } + + if !matches!(self.current_token, Token::Dash) { + break; + } + self.advance()?; + + let value = if matches!(self.current_token, Token::Newline | Token::Eof) { + Value::Object(Map::new()) + } else if matches!(self.current_token, Token::LeftBracket) { + self.parse_array(depth + 1)? + } else if self.is_key_token() { + let (key, key_was_quoted) = match self.key_from_token() { + Some(key) => key, + None => { + return Err(self.parse_error_with_context(format!( + "Expected key, found {:?}", + self.current_token + ))); + } + }; + self.validate_unquoted_key(&key, key_was_quoted)?; + self.advance()?; + + if matches!(self.current_token, Token::Colon | Token::LeftBracket) { + let first_value = + if matches!(self.current_token, Token::LeftBracket) { + self.parse_array_with_context( + depth + 1, + ArrayParseContext::ListItemFirstField, + )? + } else { + self.advance()?; + if matches!(self.current_token, Token::LeftBracket) { + self.parse_array(depth + 2)? + } else { + self.parse_field_value(depth + 2)? + } + }; + + let mut obj = Map::new(); + obj.insert(key, first_value); + + let field_indent = indent_size * (depth + 2); + + let should_parse_more_fields = if matches!(self.current_token, Token::Newline) { let next_indent = self.scanner.count_leading_spaces(); + let next_indent = self.normalize_indent(next_indent); + if next_indent < field_indent { + false + } else { + self.advance()?; + self.skip_newlines()?; + true + } + } else if self.is_key_token() { + let current_indent = self + .normalize_indent(self.scanner.get_last_line_indent()); + current_indent == field_indent + } else { + false + }; + + if should_parse_more_fields { + while !matches!(self.current_token, Token::Eof) { + let current_indent = self + .normalize_indent(self.scanner.get_last_line_indent()); + if current_indent != field_indent { + break; + } + + if matches!(self.current_token, Token::Dash) { break; } + + let (field_key, field_key_was_quoted) = + match self.key_from_token() { + Some(key) => key, + None => break, + }; + self.validate_unquoted_key( + &field_key, + field_key_was_quoted, + )?; self.advance()?; - if !self.options.strict { + + let field_value = + if matches!(self.current_token, Token::LeftBracket) { + self.parse_array(depth + 2)? + } else if matches!(self.current_token, Token::Colon) { + self.advance()?; + if matches!(self.current_token, Token::LeftBracket) + { + self.parse_array(depth + 2)? + } else { + self.parse_field_value(depth + 2)? + } + } else { + break; + }; + + obj.insert(field_key, field_value); + + if matches!(self.current_token, Token::Newline) { + let next_indent = self.scanner.count_leading_spaces(); + let next_indent = self.normalize_indent(next_indent); + if next_indent < field_indent { + break; + } + self.advance()?; self.skip_newlines()?; + } else { + break; } - } else { - break; } } - } - Value::Object(obj) - } else if matches!(self.current_token, Token::LeftBracket) { - // Array as object value: "key[2]: ..." - let array_value = self.parse_array(depth + 1)?; - let mut obj = Map::new(); - obj.insert(key, array_value); - Value::Object(obj) + Value::Object(obj) + } else if matches!(self.current_token, Token::LeftBracket) { + let array_value = self.parse_array(depth + 1)?; + let mut obj = Map::new(); + obj.insert(key, array_value); + Value::Object(obj) + } else { + Value::String(key) + } } else { - // Plain string value - Value::String(key) - } - } else { - self.parse_primitive()? - }; + self.parse_primitive()? + }; - items.push(value); + items.push(value); - if items.len() < length { if matches!(self.current_token, Token::Newline) { self.advance()?; - - if self.options.strict && matches!(self.current_token, Token::Newline) { - return Err(self.parse_error_with_context( - "Blank lines are not allowed inside list arrays in strict mode", - )); - } - self.skip_newlines()?; + } else if matches!(self.current_token, Token::Eof) { + break; } else if !matches!(self.current_token, Token::Dash) { return Err(self.parse_error_with_context(format!( "Expected newline or next list item after list item {}", - i + 1 - ))); - } - } else if matches!(self.current_token, Token::Newline) { - // After the last item, check for extra items - self.advance()?; - self.skip_newlines()?; - - let list_indent = self.options.indent.get_spaces() * (depth + 1); - let actual_indent = self.scanner.get_last_line_indent(); - // If we see another dash at the same indent, there are too many items - if actual_indent == list_indent && matches!(self.current_token, Token::Dash) - { - return Err(self.parse_error_with_context(format!( - "Array length mismatch: expected {length} items, but more items \ - found", + items.len() ))); } } } } _ => { - for i in 0..length { - if i > 0 { - if matches!(self.current_token, Token::Delimiter(_)) { - self.advance()?; + if self.options.strict { + for i in 0..length { + if i > 0 { + if matches!(self.current_token, Token::Delimiter(_)) { + self.advance()?; + } else { + return Err(self + .parse_error_with_context(format!( + "Expected delimiter, found {:?}", + self.current_token + )) + .with_suggestion(format!( + "Expected delimiter between items (item {} of {})", + i + 1, + length + ))); + } + } + + let value = if matches!(self.current_token, Token::Delimiter(_)) + || (matches!(self.current_token, Token::Eof | Token::Newline) + && i < length) + { + Value::String(String::new()) + } else if matches!(self.current_token, Token::LeftBracket) { + self.parse_array(depth + 1)? } else { - return Err(self - .parse_error_with_context(format!( + self.parse_primitive()? + }; + + items.push(value); + } + } else { + let mut i = 0; + loop { + if i == 0 && matches!(self.current_token, Token::Newline | Token::Eof) { + break; + } + + if i > 0 { + if matches!(self.current_token, Token::Delimiter(_)) { + self.advance()?; + } else { + return Err(self.parse_error_with_context(format!( "Expected delimiter, found {:?}", self.current_token - )) - .with_suggestion(format!( - "Expected delimiter between items (item {} of {})", - i + 1, - length ))); + } } - } - let value = if matches!(self.current_token, Token::Delimiter(_)) - || (matches!(self.current_token, Token::Eof | Token::Newline) && i < length) - { - Value::String(String::new()) - } else if matches!(self.current_token, Token::LeftBracket) { - self.parse_array(depth + 1)? - } else { - self.parse_primitive()? - }; + let value = if matches!(self.current_token, Token::Delimiter(_)) + || matches!(self.current_token, Token::Eof | Token::Newline) + { + Value::String(String::new()) + } else if matches!(self.current_token, Token::LeftBracket) { + self.parse_array(depth + 1)? + } else { + self.parse_primitive()? + }; + + items.push(value); + i += 1; - items.push(value); + if matches!(self.current_token, Token::Newline | Token::Eof) { + break; + } + } } } } - validation::validate_array_length(length, items.len())?; + if self.options.strict { + validation::validate_array_length(length, items.len())?; - if self.options.strict && matches!(self.current_token, Token::Delimiter(_)) { - return Err(self.parse_error_with_context(format!( - "Array length mismatch: expected {length} items, but more items found", - ))); + if matches!(self.current_token, Token::Delimiter(_)) { + return Err(self.parse_error_with_context(format!( + "Array length mismatch: expected {length} items, but more items found", + ))); + } } Ok(Value::Array(items)) @@ -1126,12 +1559,14 @@ impl<'a> Parser<'a> { .into()) } } - Token::String(s, _) => { + Token::String(s, was_quoted) => { // Tabular fields can have multiple string tokens joined with spaces + self.validate_unquoted_string(s, *was_quoted)?; let mut accumulated = s.clone(); self.advance()?; - while let Token::String(next, _) = &self.current_token { + while let Token::String(next, next_was_quoted) = &self.current_token { + self.validate_unquoted_string(next, *next_was_quoted)?; if !accumulated.is_empty() { accumulated.push(' '); } @@ -1176,7 +1611,8 @@ impl<'a> Parser<'a> { .into()) } } - Token::String(s, _) => { + Token::String(s, was_quoted) => { + self.validate_unquoted_string(s, *was_quoted)?; let val = s.clone(); self.advance()?; Ok(Value::String(val)) @@ -1192,7 +1628,8 @@ impl<'a> Parser<'a> { let (line, column) = self.scanner.current_position(); let message = message.into(); - let context = self.get_error_context(line, column); + let context = ErrorContext::from_shared_input(self.input.clone(), line, column, 2) + .unwrap_or_else(|| ErrorContext::new("")); ToonError::ParseError { line, @@ -1202,48 +1639,6 @@ impl<'a> Parser<'a> { } } - fn get_error_context(&self, line: usize, column: usize) -> ErrorContext { - let lines: Vec<&str> = self.input.lines().collect(); - - let source_line = if line > 0 && line <= lines.len() { - lines[line - 1].to_string() - } else { - String::new() - }; - - let preceding_lines: Vec = if line > 1 { - lines[line.saturating_sub(3)..line - 1] - .iter() - .map(|s| s.to_string()) - .collect() - } else { - Vec::new() - }; - - let following_lines: Vec = if line < lines.len() { - lines[line..line.saturating_add(2).min(lines.len())] - .iter() - .map(|s| s.to_string()) - .collect() - } else { - Vec::new() - }; - - let indicator = if column > 0 { - Some(format!("{:width$}^", "", width = column - 1)) - } else { - None - }; - - ErrorContext { - source_line, - preceding_lines, - following_lines, - suggestion: None, - indicator, - } - } - fn validate_indentation(&self, indent_amount: usize) -> ToonResult<()> { if !self.options.strict { return Ok(()); @@ -1260,6 +1655,19 @@ impl<'a> Parser<'a> { Ok(()) } } + + fn normalize_indent(&self, indent_amount: usize) -> usize { + if self.options.strict { + return indent_amount; + } + + let indent_size = self.options.indent.get_spaces(); + if indent_size == 0 { + indent_amount + } else { + (indent_amount / indent_size) * indent_size + } + } } #[cfg(test)] @@ -1441,10 +1849,7 @@ mod tests { #[test] fn test_round_trip_parentheses() { - use crate::{ - decode::decode_default, - encode::encode_default, - }; + use crate::{decode::decode_default, encode::encode_default}; let original = json!({ "message": "Mostly Functions (3 of 3)", diff --git a/src/decode/scanner.rs b/src/decode/scanner.rs index 2301330..0c61996 100644 --- a/src/decode/scanner.rs +++ b/src/decode/scanner.rs @@ -1,7 +1,8 @@ -use crate::types::{ - Delimiter, - ToonError, - ToonResult, +use std::sync::Arc; + +use crate::{ + constants::DEFAULT_INDENT, + types::{Delimiter, ToonError, ToonResult}, }; /// Tokens produced by the scanner during lexical analysis. @@ -25,24 +26,43 @@ pub enum Token { /// Scanner that tokenizes TOON input into a sequence of tokens. pub struct Scanner { - input: Vec, + input: Arc, position: usize, line: usize, column: usize, active_delimiter: Option, last_line_indent: usize, + cached_indent: Option, + coerce_types: bool, + indent_width: usize, + allow_tab_indent: bool, +} + +#[derive(Clone, Copy, Debug)] +struct CachedIndent { + position: usize, + indent: usize, + chars: usize, } impl Scanner { /// Create a new scanner for the given input string. pub fn new(input: &str) -> Self { + Self::from_shared_input(Arc::from(input)) + } + + pub fn from_shared_input(input: Arc) -> Self { Self { - input: input.chars().collect(), + input, position: 0, line: 1, column: 1, active_delimiter: None, last_line_indent: 0, + cached_indent: None, + coerce_types: true, + indent_width: DEFAULT_INDENT, + allow_tab_indent: false, } } @@ -51,6 +71,15 @@ impl Scanner { self.active_delimiter = delimiter; } + pub fn set_coerce_types(&mut self, coerce_types: bool) { + self.coerce_types = coerce_types; + } + + pub fn configure_indentation(&mut self, strict: bool, indent_width: usize) { + self.allow_tab_indent = !strict; + self.indent_width = indent_width.max(1); + } + /// Get the current position (line, column). pub fn current_position(&self) -> (usize, usize) { (self.line, self.column) @@ -65,57 +94,73 @@ impl Scanner { } pub fn peek(&self) -> Option { - self.input.get(self.position).copied() + let bytes = self.input.as_bytes(); + match bytes.get(self.position) { + Some(&byte) if byte.is_ascii() => Some(byte as char), + Some(_) => self.input[self.position..].chars().next(), + None => None, + } } - pub fn count_leading_spaces(&self) -> usize { - let mut idx = self.position; - let mut count = 0; - while let Some(&ch) = self.input.get(idx) { - if ch == ' ' { - count += 1; - idx += 1; - } else { - break; - } - } - count + pub fn count_leading_spaces(&mut self) -> usize { + self.peek_indent() } pub fn count_spaces_after_newline(&self) -> usize { let mut idx = self.position; - if self.input.get(idx) != Some(&'\n') { + if self.input.as_bytes().get(idx) != Some(&b'\n') { return 0; } idx += 1; - let mut count = 0; - while let Some(&ch) = self.input.get(idx) { - if ch == ' ' { - count += 1; + self.count_indent_from(idx) + } + + pub fn peek_ahead(&self, offset: usize) -> Option { + let bytes = self.input.as_bytes(); + let mut idx = self.position; + let mut remaining = offset; + + while let Some(&byte) = bytes.get(idx) { + if byte.is_ascii() { + if remaining == 0 { + return Some(byte as char); + } idx += 1; - } else { - break; + remaining -= 1; + continue; } + + return self.input[self.position..].chars().nth(offset); } - count - } - pub fn peek_ahead(&self, offset: usize) -> Option { - self.input.get(self.position + offset).copied() + None } pub fn advance(&mut self) -> Option { - if let Some(ch) = self.input.get(self.position) { - self.position += 1; - if *ch == '\n' { - self.line += 1; - self.column = 1; - } else { - self.column += 1; + let bytes = self.input.as_bytes(); + match bytes.get(self.position) { + Some(&byte) if byte.is_ascii() => { + self.position += 1; + if byte == b'\n' { + self.line += 1; + self.column = 1; + } else { + self.column += 1; + } + Some(byte as char) } - Some(*ch) - } else { - None + Some(_) => { + let ch = self.input[self.position..].chars().next()?; + self.position += ch.len_utf8(); + if ch == '\n' { + self.line += 1; + self.column = 1; + } else { + self.column += 1; + } + Some(ch) + } + None => None, } } @@ -129,29 +174,97 @@ impl Scanner { } } + fn count_indent_from(&self, mut idx: usize) -> usize { + self.count_indent_from_with_chars(&mut idx).0 + } + + fn count_indent_from_with_chars(&self, idx: &mut usize) -> (usize, usize) { + let mut count = 0; + let mut chars = 0; + let bytes = self.input.as_bytes(); + while *idx < bytes.len() { + match bytes[*idx] { + b' ' => { + count += 1; + chars += 1; + *idx += 1; + } + b'\t' if self.allow_tab_indent => { + count += self.indent_width; + chars += 1; + *idx += 1; + } + _ => break, + } + } + (count, chars) + } + + fn peek_indent(&mut self) -> usize { + if let Some(cached) = self.cached_indent { + if cached.position == self.position { + return cached.indent; + } + } + + let mut idx = self.position; + let (indent, chars) = self.count_indent_from_with_chars(&mut idx); + self.cached_indent = Some(CachedIndent { + position: self.position, + indent, + chars, + }); + indent + } + /// Scan the next token from the input. pub fn scan_token(&mut self) -> ToonResult { if self.column == 1 { - let mut count = 0; - let mut idx = self.position; - - while let Some(&ch) = self.input.get(idx) { - if ch == ' ' { - count += 1; - idx += 1; - } else { - if ch == '\t' { + let mut indent_consumed = false; + if let Some(cached) = self.cached_indent.take() { + if cached.position == self.position { + self.position += cached.chars; + self.column += cached.chars; + if !self.allow_tab_indent && matches!(self.peek(), Some('\t')) { let (line, col) = self.current_position(); return Err(ToonError::parse_error( line, - col + count, + col, "Tabs are not allowed in indentation", )); } - break; + self.last_line_indent = cached.indent; + indent_consumed = true; + } else { + self.cached_indent = Some(cached); } } - self.last_line_indent = count; + + if !indent_consumed { + let mut count = 0; + while let Some(ch) = self.peek() { + match ch { + ' ' => { + count += 1; + self.advance(); + } + '\t' => { + if !self.allow_tab_indent { + let (line, col) = self.current_position(); + return Err(ToonError::parse_error( + line, + col + count, + "Tabs are not allowed in indentation", + )); + } + count += self.indent_width; + self.advance(); + } + _ => break, + } + } + self.last_line_indent = count; + } } self.skip_whitespace(); @@ -264,38 +377,55 @@ impl Scanner { fn scan_unquoted_string(&mut self) -> ToonResult { let mut value = String::new(); + let bytes = self.input.as_bytes(); + let mut idx = self.position; + let mut start = idx; - while let Some(ch) = self.peek() { - if ch == '\n' - || ch == ' ' - || ch == ':' - || ch == '[' - || ch == ']' - || ch == '{' - || ch == '}' - { - break; + while idx < bytes.len() { + let byte = bytes[idx]; + if byte.is_ascii() { + let ch = byte as char; + if self.is_unquoted_terminator(ch) { + break; + } + idx += 1; + continue; + } + + if idx > start { + value.push_str(&self.input[start..idx]); + self.position = idx; + self.column += idx - start; } - // Active delimiters stop the string; otherwise they're part of it - if let Some(active) = self.active_delimiter { - if (active == Delimiter::Comma && ch == ',') - || (active == Delimiter::Pipe && ch == '|') - || (active == Delimiter::Tab && ch == '\t') - { + while let Some(ch) = self.peek() { + if self.is_unquoted_terminator(ch) { break; } + value.push(ch); + self.advance(); } - value.push(ch); - self.advance(); + + start = self.position; + idx = self.position; + break; + } + + if idx > start { + value.push_str(&self.input[start..idx]); + self.position = idx; + self.column += idx - start; } // Single-char delimiters kept as-is, others trimmed - let value = if value.len() == 1 && (value == "," || value == "|" || value == "\t") { - value - } else { - value.trim_end().to_string() - }; + if !(value.len() == 1 && (value == "," || value == "|" || value == "\t")) { + let trimmed_len = value.trim_end().len(); + value.truncate(trimmed_len); + } + + if !self.coerce_types { + return Ok(Token::String(value, false)); + } match value.as_str() { "null" => Ok(Token::Null), @@ -305,16 +435,30 @@ impl Scanner { } } + fn is_unquoted_terminator(&self, ch: char) -> bool { + if matches!(ch, '\n' | ' ' | ':' | '[' | ']' | '{' | '}') { + return true; + } + + if let Some(active) = self.active_delimiter { + return matches!( + (active, ch), + (Delimiter::Comma, ',') | (Delimiter::Pipe, '|') | (Delimiter::Tab, '\t') + ); + } + + false + } + pub fn get_last_line_indent(&self) -> usize { self.last_line_indent } fn scan_number_string(&mut self, negative: bool) -> ToonResult { - let mut num_str = if negative { - String::from("-") - } else { - String::new() - }; + let mut num_str = String::with_capacity(32); + if negative { + num_str.push('-'); + } while let Some(ch) = self.peek() { if ch.is_ascii_digit() || ch == '.' || ch == 'e' || ch == 'E' || ch == '+' || ch == '-' @@ -330,6 +474,10 @@ impl Scanner { } fn parse_number(&self, s: &str) -> ToonResult { + if !self.coerce_types { + return Ok(Token::String(s.to_string(), false)); + } + // Number followed immediately by other chars like "0(f)" should be a string if let Some(next_ch) = self.peek() { if next_ch != ' ' @@ -352,9 +500,17 @@ impl Scanner { // Leading zeros like "05" are strings, but "0", "0.5", "-0" are numbers if s.starts_with('0') && s.len() > 1 { - let second_char = s.chars().nth(1).unwrap(); - if second_char.is_ascii_digit() { - return Ok(Token::String(s.to_string(), false)); + if let Some(second_char) = s.chars().nth(1) { + if second_char.is_ascii_digit() { + return Ok(Token::String(s.to_string(), false)); + } + } + } + if s.starts_with("-0") && s.len() > 2 { + if let Some(third_char) = s.chars().nth(2) { + if third_char.is_ascii_digit() { + return Ok(Token::String(s.to_string(), false)); + } } } @@ -372,11 +528,24 @@ impl Scanner { } /// Read the rest of the current line (until newline or EOF). - /// Returns the content with a flag indicating if it started with - /// whitespace. - pub fn read_rest_of_line_with_space_info(&mut self) -> (String, bool) { - let had_leading_space = matches!(self.peek(), Some(' ')); - self.skip_whitespace(); + /// Returns the content and any leading spaces between the current token + /// and the rest of the line. + pub fn read_rest_of_line_with_space_info(&mut self) -> (String, String) { + let (content, leading_space) = self.read_rest_of_line_with_space_count(); + let mut spaces = String::with_capacity(leading_space); + spaces.extend(std::iter::repeat_n(' ', leading_space)); + (content, spaces) + } + + /// Read the rest of the current line (until newline or EOF). + /// Returns the content and number of leading spaces between the current + /// token and the rest of the line. + pub fn read_rest_of_line_with_space_count(&mut self) -> (String, usize) { + let mut leading_space = 0usize; + while matches!(self.peek(), Some(' ')) { + leading_space += 1; + self.advance(); + } let mut result = String::new(); while let Some(ch) = self.peek() { @@ -387,12 +556,14 @@ impl Scanner { self.advance(); } - (result.trim_end().to_string(), had_leading_space) + let trimmed_len = result.trim_end().len(); + result.truncate(trimmed_len); + (result, leading_space) } /// Read the rest of the current line (until newline or EOF). pub fn read_rest_of_line(&mut self) -> String { - self.read_rest_of_line_with_space_info().0 + self.read_rest_of_line_with_space_count().0 } /// Parse a complete value string into a token. @@ -406,11 +577,11 @@ impl Scanner { if trimmed.starts_with('"') { let mut value = String::new(); let mut escaped = false; - let chars: Vec = trimmed.chars().collect(); - let mut i = 1; - while i < chars.len() { - let ch = chars[i]; + let mut chars = trimmed.char_indices(); + chars.next(); + + for (idx, ch) in chars { if escaped { match ch { 'n' => value.push('\n'), @@ -427,10 +598,16 @@ impl Scanner { } } escaped = false; - } else if ch == '\\' { + continue; + } + + if ch == '\\' { escaped = true; - } else if ch == '"' { - if i != chars.len() - 1 { + continue; + } + + if ch == '"' { + if idx + ch.len_utf8() != trimmed.len() { return Err(ToonError::parse_error( self.line, self.column, @@ -438,10 +615,9 @@ impl Scanner { )); } return Ok(Token::String(value, true)); - } else { - value.push(ch); } - i += 1; + + value.push(ch); } return Err(ToonError::parse_error( @@ -451,6 +627,10 @@ impl Scanner { )); } + if !self.coerce_types { + return Ok(Token::String(trimmed.to_string(), false)); + } + match trimmed { "true" => return Ok(Token::Bool(true)), "false" => return Ok(Token::Bool(false)), @@ -458,12 +638,20 @@ impl Scanner { _ => {} } - if trimmed.starts_with('-') || trimmed.chars().next().unwrap().is_ascii_digit() { - // Leading zeros like "05" are strings + if trimmed.starts_with('-') || trimmed.chars().next().is_some_and(|c| c.is_ascii_digit()) { + // Leading zeros like "05" or "-05" are strings if trimmed.starts_with('0') && trimmed.len() > 1 { - let second_char = trimmed.chars().nth(1).unwrap(); - if second_char.is_ascii_digit() { - return Ok(Token::String(trimmed.to_string(), false)); + if let Some(second_char) = trimmed.chars().nth(1) { + if second_char.is_ascii_digit() { + return Ok(Token::String(trimmed.to_string(), false)); + } + } + } + if trimmed.starts_with("-0") && trimmed.len() > 2 { + if let Some(third_char) = trimmed.chars().nth(2) { + if third_char.is_ascii_digit() { + return Ok(Token::String(trimmed.to_string(), false)); + } } } @@ -592,24 +780,47 @@ mod tests { #[test] fn test_read_rest_of_line_with_space_info() { let mut scanner = Scanner::new(" world"); - let (content, had_space) = scanner.read_rest_of_line_with_space_info(); + let (content, leading_space) = scanner.read_rest_of_line_with_space_info(); + assert_eq!(content, "world"); + assert_eq!(leading_space, " "); + + let mut scanner = Scanner::new("world"); + let (content, leading_space) = scanner.read_rest_of_line_with_space_info(); + assert_eq!(content, "world"); + assert!(leading_space.is_empty()); + + let mut scanner = Scanner::new("(hello)"); + let (content, leading_space) = scanner.read_rest_of_line_with_space_info(); + assert_eq!(content, "(hello)"); + assert!(leading_space.is_empty()); + + let mut scanner = Scanner::new(""); + let (content, leading_space) = scanner.read_rest_of_line_with_space_info(); + assert_eq!(content, ""); + assert!(leading_space.is_empty()); + } + + #[test] + fn test_read_rest_of_line_with_space_count() { + let mut scanner = Scanner::new(" world"); + let (content, leading_space) = scanner.read_rest_of_line_with_space_count(); assert_eq!(content, "world"); - assert!(had_space); + assert_eq!(leading_space, 1); let mut scanner = Scanner::new("world"); - let (content, had_space) = scanner.read_rest_of_line_with_space_info(); + let (content, leading_space) = scanner.read_rest_of_line_with_space_count(); assert_eq!(content, "world"); - assert!(!had_space); + assert_eq!(leading_space, 0); let mut scanner = Scanner::new("(hello)"); - let (content, had_space) = scanner.read_rest_of_line_with_space_info(); + let (content, leading_space) = scanner.read_rest_of_line_with_space_count(); assert_eq!(content, "(hello)"); - assert!(!had_space); + assert_eq!(leading_space, 0); let mut scanner = Scanner::new(""); - let (content, had_space) = scanner.read_rest_of_line_with_space_info(); + let (content, leading_space) = scanner.read_rest_of_line_with_space_count(); assert_eq!(content, ""); - assert!(!had_space); + assert_eq!(leading_space, 0); } #[test] diff --git a/src/decode/validation.rs b/src/decode/validation.rs index 40608ef..29aab52 100644 --- a/src/decode/validation.rs +++ b/src/decode/validation.rs @@ -1,17 +1,24 @@ -use crate::types::{ - ToonError, - ToonResult, -}; +use crate::types::{ToonError, ToonResult}; +use std::collections::HashSet; /// Validate that array length matches expected value. pub fn validate_array_length(expected: usize, actual: usize) -> ToonResult<()> { - // Array length mismatches should always error, regardless of strict mode if expected != actual { return Err(ToonError::length_mismatch(expected, actual)); } Ok(()) } +/// Validate that array length is non-negative. +pub fn validate_array_length_non_negative(length: i64) -> ToonResult<()> { + if length < 0 { + return Err(ToonError::InvalidInput( + "Array length must be non-negative".to_string(), + )); + } + Ok(()) +} + /// Validate field list for tabular arrays (no duplicates, non-empty names). pub fn validate_field_list(fields: &[String]) -> ToonResult<()> { if fields.is_empty() { @@ -20,24 +27,18 @@ pub fn validate_field_list(fields: &[String]) -> ToonResult<()> { )); } - // Check for duplicate field names - for i in 0..fields.len() { - for j in (i + 1)..fields.len() { - if fields[i] == fields[j] { - return Err(ToonError::InvalidInput(format!( - "Duplicate field name: '{}'", - fields[i] - ))); - } - } - } - + let mut seen = HashSet::with_capacity(fields.len()); for field in fields { if field.is_empty() { return Err(ToonError::InvalidInput( "Field name cannot be empty".to_string(), )); } + if !seen.insert(field.as_str()) { + return Err(ToonError::InvalidInput(format!( + "Duplicate field name: '{field}'" + ))); + } } Ok(()) @@ -83,6 +84,13 @@ mod tests { assert!(validate_array_length(5, 5).is_ok()); } + #[test] + fn test_validate_array_length_non_negative() { + assert!(validate_array_length_non_negative(0).is_ok()); + assert!(validate_array_length_non_negative(5).is_ok()); + assert!(validate_array_length_non_negative(-1).is_err()); + } + #[test] fn test_validate_field_list() { assert!(validate_field_list(&["id".to_string(), "name".to_string()]).is_ok()); diff --git a/src/encode/folding.rs b/src/encode/folding.rs index beec7ae..3ead315 100644 --- a/src/encode/folding.rs +++ b/src/encode/folding.rs @@ -1,46 +1,45 @@ -use crate::types::{ - is_identifier_segment, - JsonValue as Value, - KeyFoldingMode, -}; +use std::collections::HashSet; + +use crate::types::{is_identifier_segment, JsonValue as Value, KeyFoldingMode}; /// Result of chain analysis for folding. -pub struct FoldableChain { +pub struct FoldableChain<'a> { /// The folded key path (e.g., "a.b.c") pub folded_key: String, /// The leaf value at the end of the chain - pub leaf_value: Value, + pub leaf_value: &'a Value, /// Number of segments that were folded pub depth_folded: usize, } /// Check if a value is a single-key object suitable for folding. -fn is_single_key_object(value: &Value) -> Option<(&String, &Value)> { +fn is_single_key_object(value: &Value) -> Option<(&str, &Value)> { if let Value::Object(obj) = value { if obj.len() == 1 { - return obj.iter().next(); + return obj.iter().next().map(|(key, val)| (key.as_str(), val)); } } None } /// Analyze if a key-value pair can be folded into dotted notation. -pub fn analyze_foldable_chain( - key: &str, - value: &Value, +pub fn analyze_foldable_chain<'a>( + key: &'a str, + value: &'a Value, flatten_depth: usize, - existing_keys: &[&String], -) -> Option { + existing_keys: &HashSet<&str>, +) -> Option> { if !is_identifier_segment(key) { return None; } - let mut segments = vec![key.to_string()]; + let mut segments = Vec::with_capacity(4); + segments.push(key); let mut current_value = value; // Follow single-key object chain until we hit a multi-key object or leaf while let Some((next_key, next_value)) = is_single_key_object(current_value) { - if segments.len() >= flatten_depth { + if flatten_depth != usize::MAX && segments.len() >= flatten_depth { break; } @@ -48,7 +47,7 @@ pub fn analyze_foldable_chain( break; } - segments.push(next_key.clone()); + segments.push(next_key); current_value = next_value; } @@ -57,16 +56,24 @@ pub fn analyze_foldable_chain( return None; } - let folded_key = segments.join("."); + let total_len = + segments.iter().map(|segment| segment.len()).sum::() + segments.len() - 1; + let mut folded_key = String::with_capacity(total_len); + for (idx, segment) in segments.iter().enumerate() { + if idx > 0 { + folded_key.push('.'); + } + folded_key.push_str(segment); + } // Don't fold if it would collide with an existing key - if existing_keys.contains(&&folded_key) { + if existing_keys.contains(folded_key.as_str()) { return None; } Some(FoldableChain { folded_key, - leaf_value: current_value.clone(), + leaf_value: current_value, depth_folded: segments.len(), }) } @@ -81,6 +88,7 @@ pub fn should_fold(mode: KeyFoldingMode, chain: &Option) -> bool #[cfg(test)] mod tests { use serde_json::json; + use std::collections::HashSet; use super::*; @@ -99,7 +107,7 @@ mod tests { #[test] fn test_analyze_simple_chain() { let val = Value::from(json!({"b": {"c": 1}})); - let existing: Vec<&String> = vec![]; + let existing: HashSet<&str> = HashSet::new(); let result = analyze_foldable_chain("a", &val, usize::MAX, &existing); assert!(result.is_some()); @@ -107,13 +115,13 @@ mod tests { let chain = result.unwrap(); assert_eq!(chain.folded_key, "a.b.c"); assert_eq!(chain.depth_folded, 3); - assert_eq!(chain.leaf_value, Value::from(json!(1))); + assert_eq!(chain.leaf_value, &Value::from(json!(1))); } #[test] fn test_analyze_with_flatten_depth() { let val = Value::from(json!({"b": {"c": {"d": 1}}})); - let existing: Vec<&String> = vec![]; + let existing: HashSet<&str> = HashSet::new(); let result = analyze_foldable_chain("a", &val, 2, &existing); assert!(result.is_some()); @@ -126,7 +134,7 @@ mod tests { #[test] fn test_analyze_stops_at_multi_key() { let val = Value::from(json!({"b": {"c": 1, "d": 2}})); - let existing: Vec<&String> = vec![]; + let existing: HashSet<&str> = HashSet::new(); let result = analyze_foldable_chain("a", &val, usize::MAX, &existing); assert!(result.is_some()); @@ -139,7 +147,7 @@ mod tests { #[test] fn test_analyze_rejects_non_identifier() { let val = Value::from(json!({"c": 1})); - let existing: Vec<&String> = vec![]; + let existing: HashSet<&str> = HashSet::new(); let result = analyze_foldable_chain("bad-key", &val, usize::MAX, &existing); assert!(result.is_none()); @@ -149,7 +157,8 @@ mod tests { fn test_analyze_detects_collision() { let val = Value::from(json!({"b": 1})); let existing_key = String::from("a.b"); - let existing: Vec<&String> = vec![&existing_key]; + let mut existing: HashSet<&str> = HashSet::new(); + existing.insert(existing_key.as_str()); let result = analyze_foldable_chain("a", &val, usize::MAX, &existing); assert!(result.is_none()); @@ -158,7 +167,7 @@ mod tests { #[test] fn test_analyze_too_short_chain() { let val = Value::from(json!(42)); - let existing: Vec<&String> = vec![]; + let existing: HashSet<&str> = HashSet::new(); let result = analyze_foldable_chain("a", &val, usize::MAX, &existing); assert!(result.is_none()); diff --git a/src/encode/mod.rs b/src/encode/mod.rs index 0a11987..3943903 100644 --- a/src/encode/mod.rs +++ b/src/encode/mod.rs @@ -3,23 +3,14 @@ pub mod folding; pub mod primitives; pub mod writer; use indexmap::IndexMap; +use std::collections::HashSet; use crate::{ constants::MAX_DEPTH, types::{ - EncodeOptions, - IntoJsonValue, - JsonValue as Value, - KeyFoldingMode, - ToonError, - ToonResult, - }, - utils::{ - format_canonical_number, - normalize, - validation::validate_depth, - QuotingContext, + EncodeOptions, IntoJsonValue, JsonValue as Value, KeyFoldingMode, ToonError, ToonResult, }, + utils::{normalize, validation::validate_depth, QuotingContext}, }; /// Encode any serializable value to TOON format. @@ -69,11 +60,11 @@ pub fn encode(value: &T, options: &EncodeOptions) -> ToonRe let json_value = serde_json::to_value(value).map_err(|e| ToonError::SerializationError(e.to_string()))?; let json_value: Value = json_value.into(); - encode_impl(&json_value, options) + encode_impl(json_value, options) } -fn encode_impl(value: &Value, options: &EncodeOptions) -> ToonResult { - let normalized: Value = normalize(value.clone()); +fn encode_impl(value: Value, options: &EncodeOptions) -> ToonResult { + let normalized: Value = normalize(value); let mut writer = writer::Writer::new(options.clone()); match &normalized { @@ -158,7 +149,7 @@ pub fn encode_object(value: V, options: &EncodeOptions) -> Too found: value_type_name(&json_value).to_string(), }); } - encode_impl(&json_value, options) + encode_impl(json_value, options) } /// Encode a JSON array to TOON format (errors if not an array). @@ -188,7 +179,7 @@ pub fn encode_array(value: V, options: &EncodeOptions) -> Toon found: value_type_name(&json_value).to_string(), }); } - encode_impl(&json_value, options) + encode_impl(json_value, options) } fn value_type_name(value: &Value) -> &'static str { @@ -218,27 +209,46 @@ fn write_object_impl( ) -> ToonResult<()> { validate_depth(depth, MAX_DEPTH)?; - let keys: Vec<&String> = obj.keys().collect(); + let allow_folding = !disable_folding && writer.options.key_folding == KeyFoldingMode::Safe; + let (key_set, prefix_conflicts) = if allow_folding { + let mut key_set: HashSet<&str> = HashSet::with_capacity(obj.len()); + let mut prefix_conflicts: HashSet<&str> = HashSet::new(); + + for key in obj.keys() { + let key_str = key.as_str(); + key_set.insert(key_str); + if key_str.contains('.') { + let mut start = 0; + while let Some(pos) = key_str[start..].find('.') { + let end = start + pos; + if end > 0 { + prefix_conflicts.insert(&key_str[..end]); + } + start = end + 1; + } + } + } + + (key_set, prefix_conflicts) + } else { + (HashSet::new(), HashSet::new()) + }; - for (i, key) in keys.iter().enumerate() { + for (i, (key, value)) in obj.iter().enumerate() { if i > 0 { writer.write_newline()?; } - let value = &obj[*key]; + let key_str = key.as_str(); // Check if this key-value pair can be folded (v1.5 feature) // Don't fold if any sibling key is a dotted path starting with this key // (e.g., don't fold inside "data" if "data.meta.items" exists as a sibling) - let has_conflicting_sibling = keys - .iter() - .any(|k| k.starts_with(&format!("{key}.")) || (k.contains('.') && k == key)); + let has_conflicting_sibling = + allow_folding && (key_str.contains('.') || prefix_conflicts.contains(key_str)); - let folded = if !disable_folding - && writer.options.key_folding == KeyFoldingMode::Safe - && !has_conflicting_sibling - { - folding::analyze_foldable_chain(key, value, writer.options.flatten_depth, &keys) + let folded = if allow_folding && !has_conflicting_sibling { + folding::analyze_foldable_chain(key_str, value, writer.options.flatten_depth, &key_set) } else { None }; @@ -250,7 +260,7 @@ fn write_object_impl( } // Write the leaf value - match &chain.leaf_value { + match chain.leaf_value { Value::Array(arr) => { // For arrays, pass the folded key to write_array so it generates the header // correctly @@ -272,20 +282,20 @@ fn write_object_impl( writer.write_key(&chain.folded_key)?; writer.write_char(':')?; writer.write_char(' ')?; - write_primitive_value(writer, &chain.leaf_value, QuotingContext::ObjectValue)?; + write_primitive_value(writer, chain.leaf_value, QuotingContext::ObjectValue)?; } } } else { // Standard (non-folded) encoding match value { Value::Array(arr) => { - write_array(writer, Some(key), arr, depth)?; + write_array(writer, Some(key_str), arr, depth)?; } Value::Object(nested_obj) => { if depth > 0 { writer.write_indent(depth)?; } - writer.write_key(key)?; + writer.write_key(key_str)?; writer.write_char(':')?; if !nested_obj.is_empty() { writer.write_newline()?; @@ -299,7 +309,7 @@ fn write_object_impl( if depth > 0 { writer.write_indent(depth)?; } - writer.write_key(key)?; + writer.write_key(key_str)?; writer.write_char(':')?; writer.write_char(' ')?; write_primitive_value(writer, value, QuotingContext::ObjectValue)?; @@ -326,63 +336,64 @@ fn write_array( // Select format based on array content: tabular (uniform objects) > inline // primitives > nested list - if let Some(keys) = is_tabular_array(arr) { - encode_tabular_array(writer, key, arr, &keys, depth)?; - } else if is_primitive_array(arr) { - encode_primitive_array(writer, key, arr, depth)?; - } else { - encode_nested_array(writer, key, arr, depth)?; + match classify_array(arr) { + ArrayKind::Tabular(keys) => encode_tabular_array(writer, key, arr, &keys, depth)?, + ArrayKind::Primitive => encode_primitive_array(writer, key, arr, depth)?, + ArrayKind::Nested => encode_nested_array(writer, key, arr, depth)?, } Ok(()) } -/// Check if an array can be encoded as tabular format (uniform objects with -/// primitive values). -fn is_tabular_array(arr: &[Value]) -> Option> { - if arr.is_empty() { - return None; - } - - let first = arr.first()?; - if !first.is_object() { - return None; - } +enum ArrayKind<'a> { + Tabular(Vec<&'a str>), + Primitive, + Nested, +} - let first_obj = first.as_object()?; - let keys: Vec = first_obj.keys().cloned().collect(); +/// Classify array shape for encoding (tabular, primitive, nested). +fn classify_array<'a>(arr: &'a [Value]) -> ArrayKind<'a> { + let first = match arr.first() { + Some(value) => value, + None => return ArrayKind::Primitive, + }; - // First object must have only primitive values - for value in first_obj.values() { - if !is_primitive(value) { - return None; + if let Value::Object(first_obj) = first { + if !first_obj.values().all(is_primitive) { + return ArrayKind::Nested; } - } - // All remaining objects must match: same keys and all primitive values - for val in arr.iter().skip(1) { - if let Some(obj) = val.as_object() { + let keys: Vec<&str> = first_obj.keys().map(|key| key.as_str()).collect(); + + for val in arr.iter().skip(1) { + let obj = match val.as_object() { + Some(obj) => obj, + None => return ArrayKind::Nested, + }; + if obj.len() != keys.len() { - return None; + return ArrayKind::Nested; } - // Verify all keys from first object exist (order doesn't matter) + for key in &keys { - if !obj.contains_key(key) { - return None; + if !obj.contains_key(*key) { + return ArrayKind::Nested; } } - // All values must be primitives - for value in obj.values() { - if !is_primitive(value) { - return None; - } + + if !obj.values().all(is_primitive) { + return ArrayKind::Nested; } - } else { - return None; } + + return ArrayKind::Tabular(keys); } - Some(keys) + if arr.iter().all(is_primitive) { + ArrayKind::Primitive + } else { + ArrayKind::Nested + } } /// Check if a value is a primitive (not array or object). @@ -394,10 +405,6 @@ fn is_primitive(value: &Value) -> bool { } /// Check if all array elements are primitives. -fn is_primitive_array(arr: &[Value]) -> bool { - arr.iter().all(is_primitive) -} - fn encode_primitive_array( writer: &mut writer::Writer, key: Option<&str>, @@ -427,11 +434,10 @@ fn write_primitive_value( ) -> ToonResult<()> { match value { Value::Null => writer.write_str("null"), - Value::Bool(b) => writer.write_str(&b.to_string()), + Value::Bool(b) => writer.write_str(if *b { "true" } else { "false" }), Value::Number(n) => { // Format in canonical TOON form (no exponents, no trailing zeros) - let num_str = format_canonical_number(n); - writer.write_str(&num_str) + writer.write_canonical_number(n) } Value::String(s) => { if writer.needs_quoting(s, context) { @@ -450,7 +456,7 @@ fn encode_tabular_array( writer: &mut writer::Writer, key: Option<&str>, arr: &[Value], - keys: &[String], + keys: &[&str], depth: usize, ) -> ToonResult<()> { writer.write_array_header(key, arr.len(), Some(keys), depth)?; @@ -469,7 +475,7 @@ fn encode_tabular_array( } // Missing fields become null - if let Some(val) = obj.get(key) { + if let Some(val) = obj.get(*key) { write_primitive_value(writer, val, QuotingContext::ArrayValue)?; } else { writer.write_str("null")?; @@ -495,12 +501,12 @@ fn encode_tabular_array( fn encode_list_item_tabular_array( writer: &mut writer::Writer, arr: &[Value], - keys: &[String], + keys: &[&str], depth: usize, ) -> ToonResult<()> { // Write array header without key (key already written on hyphen line) writer.write_char('[')?; - writer.write_str(&arr.len().to_string())?; + writer.write_usize(arr.len())?; if writer.options.delimiter != crate::types::Delimiter::Comma { writer.write_char(writer.options.delimiter.as_char())?; @@ -534,7 +540,7 @@ fn encode_list_item_tabular_array( } // Missing fields become null - if let Some(val) = obj.get(key) { + if let Some(val) = obj.get(*key) { write_primitive_value(writer, val, QuotingContext::ArrayValue)?; } else { writer.write_str("null")?; @@ -585,14 +591,30 @@ fn encode_nested_array( // (depth +2 relative to hyphen) for their nested content // (rows for tabular, items for non-uniform) writer.write_key(first_key)?; - - if let Some(keys) = is_tabular_array(arr) { - // Tabular array: write inline with correct indentation - encode_list_item_tabular_array(writer, arr, &keys, depth + 1)?; + if arr.is_empty() { + writer.write_empty_array_with_key(None, depth + 2)?; } else { - // Non-tabular array: write with depth offset - // (items at depth +2 instead of depth +1) - write_array(writer, None, arr, depth + 2)?; + match classify_array(arr) { + ArrayKind::Tabular(keys) => { + // Tabular array: write inline with correct indentation + encode_list_item_tabular_array( + writer, + arr, + &keys, + depth + 1, + )?; + } + ArrayKind::Primitive => { + // Non-tabular array: write with depth offset + // (items at depth +2 instead of depth +1) + encode_primitive_array(writer, None, arr, depth + 2)?; + } + ArrayKind::Nested => { + // Non-tabular array: write with depth offset + // (items at depth +2 instead of depth +1) + encode_nested_array(writer, None, arr, depth + 2)?; + } + } } } Value::Object(nested_obj) => { diff --git a/src/encode/writer.rs b/src/encode/writer.rs index ba8ad96..ae11ec3 100644 --- a/src/encode/writer.rs +++ b/src/encode/writer.rs @@ -1,15 +1,8 @@ use crate::{ - types::{ - Delimiter, - EncodeOptions, - ToonResult, - }, + types::{Delimiter, EncodeOptions, Number, ToonResult}, utils::{ - string::{ - is_valid_unquoted_key, - needs_quoting, - quote_string, - }, + number::write_canonical_number_into, + string::{escape_string_into, is_valid_unquoted_key, needs_quoting}, QuotingContext, }, }; @@ -19,15 +12,20 @@ pub struct Writer { buffer: String, pub(crate) options: EncodeOptions, active_delimiters: Vec, + indent_unit: String, + indent_cache: Vec, } impl Writer { /// Create a new writer with the given options. pub fn new(options: EncodeOptions) -> Self { + let indent_unit = " ".repeat(options.indent.get_spaces()); Self { buffer: String::new(), active_delimiters: vec![options.delimiter], options, + indent_unit, + indent_cache: vec![String::new()], } } @@ -52,10 +50,13 @@ impl Writer { } pub fn write_indent(&mut self, depth: usize) -> ToonResult<()> { - let indent_string = self.options.indent.get_string(depth); - if !indent_string.is_empty() { - self.buffer.push_str(&indent_string); + if depth == 0 || self.indent_unit.is_empty() { + return Ok(()); + } + if depth >= self.indent_cache.len() { + self.extend_indent_cache(depth); } + self.buffer.push_str(&self.indent_cache[depth]); Ok(()) } @@ -77,7 +78,7 @@ impl Writer { &mut self, key: Option<&str>, length: usize, - fields: Option<&[String]>, + fields: Option<&[&str]>, depth: usize, ) -> ToonResult<()> { if let Some(k) = key { @@ -88,7 +89,7 @@ impl Writer { } self.write_char('[')?; - self.write_str(&length.to_string())?; + self.write_usize(length)?; // Only write delimiter in header if it's not comma (comma is default/implied) if self.options.delimiter != Delimiter::Comma { @@ -125,7 +126,7 @@ impl Writer { self.write_key(k)?; } self.write_char('[')?; - self.write_str("0")?; + self.write_usize(0)?; if self.options.delimiter != Delimiter::Comma { self.write_delimiter()?; @@ -145,7 +146,10 @@ impl Writer { } pub fn write_quoted_string(&mut self, s: &str) -> ToonResult<()> { - self.write_str("e_string(s)) + self.buffer.push('"'); + escape_string_into(&mut self.buffer, s); + self.buffer.push('"'); + Ok(()) } pub fn write_value(&mut self, s: &str, context: QuotingContext) -> ToonResult<()> { @@ -156,6 +160,17 @@ impl Writer { } } + pub fn write_canonical_number(&mut self, n: &Number) -> ToonResult<()> { + write_canonical_number_into(n, &mut self.buffer); + Ok(()) + } + + pub fn write_usize(&mut self, value: usize) -> ToonResult<()> { + let mut buf = itoa::Buffer::new(); + self.buffer.push_str(buf.format(value as u64)); + Ok(()) + } + /// Push a new delimiter onto the stack (for nested arrays with different /// delimiters). pub fn push_active_delimiter(&mut self, delim: Delimiter) { @@ -177,6 +192,21 @@ impl Writer { fn get_document_delimiter_char(&self) -> char { self.options.delimiter.as_char() } + + fn extend_indent_cache(&mut self, depth: usize) { + while self.indent_cache.len() <= depth { + let next = match self.indent_cache.last() { + Some(prev) => { + let mut s = String::with_capacity(prev.len() + self.indent_unit.len()); + s.push_str(prev); + s.push_str(&self.indent_unit); + s + } + None => String::new(), + }; + self.indent_cache.push(next); + } + } } #[cfg(test)] @@ -247,7 +277,7 @@ mod tests { let opts = EncodeOptions::default(); let mut writer = Writer::new(opts); - let fields = vec!["id".to_string(), "name".to_string()]; + let fields = vec!["id", "name"]; writer .write_array_header(Some("users"), 2, Some(&fields), 0) @@ -267,7 +297,7 @@ mod tests { let opts = EncodeOptions::new().with_delimiter(Delimiter::Pipe); let mut writer = Writer::new(opts); - let fields = vec!["id".to_string(), "name".to_string()]; + let fields = vec!["id", "name"]; writer .write_array_header(Some("users"), 2, Some(&fields), 0) diff --git a/src/lib.rs b/src/lib.rs index 67c0133..ac2778f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,72 +28,39 @@ pub mod constants; pub mod decode; pub mod encode; -#[cfg(feature = "cli")] +pub mod serde; +#[cfg(feature = "tui")] pub mod tui; pub mod types; pub mod utils; pub use decode::{ - decode, - decode_default, - decode_no_coerce, - decode_no_coerce_with_options, - decode_strict, + decode, decode_default, decode_no_coerce, decode_no_coerce_with_options, decode_strict, decode_strict_with_options, }; -pub use encode::{ - encode, - encode_array, - encode_default, - encode_object, -}; -pub use types::{ - DecodeOptions, - Delimiter, - EncodeOptions, - Indent, - ToonError, +pub use encode::{encode, encode_array, encode_default, encode_object}; +pub use serde::{ + from_reader, from_reader_with_options, from_slice, from_slice_with_options, from_str, + from_str_with_options, to_string, to_string_with_options, to_vec, to_writer, + to_writer_with_options, }; +pub use types::{DecodeOptions, Delimiter, EncodeOptions, Indent, ToonError}; pub use utils::{ - literal::{ - is_keyword, - is_literal_like, - }, + literal::{is_keyword, is_literal_like}, normalize, - string::{ - escape_string, - is_valid_unquoted_key, - needs_quoting, - }, + string::{escape_string, is_valid_unquoted_key, needs_quoting}, }; #[cfg(test)] mod tests { - use serde_json::{ - json, - Value, - }; + use serde_json::{json, Value}; use crate::{ constants::is_keyword, - decode::{ - decode_default, - decode_strict, - }, - encode::{ - encode, - encode_default, - }, - types::{ - Delimiter, - EncodeOptions, - }, - utils::{ - escape_string, - is_literal_like, - needs_quoting, - normalize, - }, + decode::{decode_default, decode_strict}, + encode::{encode, encode_default}, + types::{Delimiter, EncodeOptions}, + utils::{escape_string, is_literal_like, needs_quoting, normalize}, }; #[test] @@ -160,10 +127,7 @@ mod tests { assert!(needs_quoting("true", Delimiter::Comma.as_char())); } - use serde::{ - Deserialize, - Serialize, - }; + use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize, PartialEq)] struct TestUser { @@ -174,10 +138,7 @@ mod tests { #[test] fn test_encode_decode_simple_struct() { - use crate::{ - decode_default, - encode_default, - }; + use crate::{decode_default, encode_default}; let user = TestUser { name: "Alice".to_string(), @@ -203,10 +164,7 @@ mod tests { #[test] fn test_encode_decode_with_array() { - use crate::{ - decode_default, - encode_default, - }; + use crate::{decode_default, encode_default}; let product = TestProduct { id: 42, @@ -221,10 +179,7 @@ mod tests { #[test] fn test_encode_decode_vec_of_structs() { - use crate::{ - decode_default, - encode_default, - }; + use crate::{decode_default, encode_default}; let users = vec![ TestUser { @@ -262,10 +217,7 @@ mod tests { #[test] fn test_encode_decode_nested_structs() { - use crate::{ - decode_default, - encode_default, - }; + use crate::{decode_default, encode_default}; let nested = Nested { outer: OuterStruct { @@ -283,10 +235,7 @@ mod tests { #[test] fn test_round_trip_list_item_tabular_v3() { - use crate::{ - decode_default, - encode_default, - }; + use crate::{decode_default, encode_default}; let original = json!({ "items": [ @@ -309,10 +258,7 @@ mod tests { #[test] fn test_round_trip_complex_list_item_tabular_v3() { - use crate::{ - decode_default, - encode_default, - }; + use crate::{decode_default, encode_default}; let original = json!({ "data": [ @@ -342,10 +288,7 @@ mod tests { #[test] fn test_round_trip_mixed_list_items_v3() { - use crate::{ - decode_default, - encode_default, - }; + use crate::{decode_default, encode_default}; let original = json!({ "entries": [ diff --git a/src/serde.rs b/src/serde.rs new file mode 100644 index 0000000..eb25db1 --- /dev/null +++ b/src/serde.rs @@ -0,0 +1,191 @@ +use std::io::{Read, Write}; + +use ::serde::{de::DeserializeOwned, Serialize}; + +use crate::types::ToonResult; +use crate::{decode, encode, DecodeOptions, EncodeOptions, ToonError}; + +/// Serialize a value to a TOON string using default options. +/// +/// # Examples +/// ``` +/// use toon_format::to_string; +/// let toon = to_string(&serde_json::json!({"a": 1}))?; +/// assert!(toon.contains("a: 1")); +/// # Ok::<(), toon_format::ToonError>(()) +/// ``` +pub fn to_string(value: &T) -> ToonResult { + encode(value, &EncodeOptions::default()) +} + +/// Serialize a value to a TOON string using custom options. +/// +/// # Examples +/// ``` +/// use toon_format::{to_string_with_options, Delimiter, EncodeOptions}; +/// let opts = EncodeOptions::new().with_delimiter(Delimiter::Pipe); +/// let toon = to_string_with_options(&serde_json::json!({"items": ["a", "b"]}), &opts)?; +/// assert!(toon.contains('|')); +/// # Ok::<(), toon_format::ToonError>(()) +/// ``` +pub fn to_string_with_options(value: &T, opts: &EncodeOptions) -> ToonResult { + encode(value, opts) +} + +/// Serialize a value to a UTF-8 byte vector using default options. +/// +/// # Examples +/// ``` +/// use toon_format::to_vec; +/// let bytes = to_vec(&serde_json::json!({"a": 1}))?; +/// assert!(!bytes.is_empty()); +/// # Ok::<(), toon_format::ToonError>(()) +/// ``` +pub fn to_vec(value: &T) -> ToonResult> { + Ok(to_string(value)?.into_bytes()) +} + +/// Serialize a value to a writer using default options. +/// +/// # Examples +/// ``` +/// use toon_format::to_writer; +/// let mut buffer = Vec::new(); +/// to_writer(&mut buffer, &serde_json::json!({"a": 1}))?; +/// assert!(!buffer.is_empty()); +/// # Ok::<(), toon_format::ToonError>(()) +/// ``` +pub fn to_writer(mut writer: W, value: &T) -> ToonResult<()> { + let encoded = to_string(value)?; + writer + .write_all(encoded.as_bytes()) + .map_err(|err| ToonError::InvalidInput(format!("Failed to write output: {err}"))) +} + +/// Serialize a value to a writer using custom options. +/// +/// # Examples +/// ``` +/// use toon_format::{to_writer_with_options, Delimiter, EncodeOptions}; +/// let opts = EncodeOptions::new().with_delimiter(Delimiter::Pipe); +/// let mut buffer = Vec::new(); +/// to_writer_with_options(&mut buffer, &serde_json::json!({"items": ["a", "b"]}), &opts)?; +/// assert!(std::str::from_utf8(&buffer).expect("valid UTF-8").contains('|')); +/// # Ok::<(), toon_format::ToonError>(()) +/// ``` +pub fn to_writer_with_options( + mut writer: W, + value: &T, + opts: &EncodeOptions, +) -> ToonResult<()> { + let encoded = to_string_with_options(value, opts)?; + writer + .write_all(encoded.as_bytes()) + .map_err(|err| ToonError::InvalidInput(format!("Failed to write output: {err}"))) +} + +/// Deserialize a value from a TOON string using default options. +/// +/// # Examples +/// ``` +/// use toon_format::from_str; +/// let value: serde_json::Value = from_str("a: 1")?; +/// assert_eq!(value, serde_json::json!({"a": 1})); +/// # Ok::<(), toon_format::ToonError>(()) +/// ``` +pub fn from_str(input: &str) -> ToonResult { + decode(input, &DecodeOptions::default()) +} + +/// Deserialize a value from a TOON string using custom options. +/// +/// # Examples +/// ``` +/// use toon_format::{from_str_with_options, DecodeOptions}; +/// let opts = DecodeOptions::new().with_strict(false); +/// let value: serde_json::Value = from_str_with_options("items[2]: a", &opts)?; +/// assert_eq!(value, serde_json::json!({"items": ["a"]})); +/// # Ok::<(), toon_format::ToonError>(()) +/// ``` +pub fn from_str_with_options( + input: &str, + opts: &DecodeOptions, +) -> ToonResult { + decode(input, opts) +} + +/// Deserialize a value from UTF-8 bytes using default options. +/// +/// # Examples +/// ``` +/// use toon_format::from_slice; +/// let value: serde_json::Value = from_slice(b"a: 1")?; +/// assert_eq!(value, serde_json::json!({"a": 1})); +/// # Ok::<(), toon_format::ToonError>(()) +/// ``` +pub fn from_slice(input: &[u8]) -> ToonResult { + let s = std::str::from_utf8(input) + .map_err(|err| ToonError::InvalidInput(format!("Input is not valid UTF-8: {err}")))?; + from_str(s) +} + +/// Deserialize a value from UTF-8 bytes using custom options. +/// +/// # Examples +/// ``` +/// use toon_format::{from_slice_with_options, DecodeOptions}; +/// let opts = DecodeOptions::new().with_strict(false); +/// let value: serde_json::Value = from_slice_with_options(b"items[2]: a", &opts)?; +/// assert_eq!(value, serde_json::json!({"items": ["a"]})); +/// # Ok::<(), toon_format::ToonError>(()) +/// ``` +pub fn from_slice_with_options( + input: &[u8], + opts: &DecodeOptions, +) -> ToonResult { + let s = std::str::from_utf8(input) + .map_err(|err| ToonError::InvalidInput(format!("Input is not valid UTF-8: {err}")))?; + from_str_with_options(s, opts) +} + +/// Deserialize a value from a reader using default options. +/// +/// # Examples +/// ``` +/// use std::io::Cursor; +/// use toon_format::from_reader; +/// let mut reader = Cursor::new("a: 1"); +/// let value: serde_json::Value = from_reader(&mut reader)?; +/// assert_eq!(value, serde_json::json!({"a": 1})); +/// # Ok::<(), toon_format::ToonError>(()) +/// ``` +pub fn from_reader(mut reader: R) -> ToonResult { + let mut buf = Vec::new(); + reader + .read_to_end(&mut buf) + .map_err(|err| ToonError::InvalidInput(format!("Failed to read input: {err}")))?; + from_slice(&buf) +} + +/// Deserialize a value from a reader using custom options. +/// +/// # Examples +/// ``` +/// use std::io::Cursor; +/// use toon_format::{from_reader_with_options, DecodeOptions}; +/// let opts = DecodeOptions::new().with_strict(false); +/// let mut reader = Cursor::new("items[2]: a"); +/// let value: serde_json::Value = from_reader_with_options(&mut reader, &opts)?; +/// assert_eq!(value, serde_json::json!({"items": ["a"]})); +/// # Ok::<(), toon_format::ToonError>(()) +/// ``` +pub fn from_reader_with_options( + mut reader: R, + opts: &DecodeOptions, +) -> ToonResult { + let mut buf = Vec::new(); + reader + .read_to_end(&mut buf) + .map_err(|err| ToonError::InvalidInput(format!("Failed to read input: {err}")))?; + from_slice_with_options(&buf, opts) +} diff --git a/src/tui/app.rs b/src/tui/app.rs index b5bbb6b..9c74637 100644 --- a/src/tui/app.rs +++ b/src/tui/app.rs @@ -1,43 +1,25 @@ -use std::{ - fs, - path::PathBuf, - time::Duration, -}; +use std::{fs, path::PathBuf, time::Duration}; -use anyhow::{ - Context, - Result, -}; -use chrono::Local; -use crossterm::event::{ - KeyCode, - KeyEvent, -}; +use anyhow::{Context, Result}; +use crossterm::event::{KeyCode, KeyEvent}; +#[cfg(feature = "cli-stats")] use tiktoken_rs::cl100k_base; use crate::{ - decode, - encode, + decode, encode, tui::{ components::FileBrowser, - events::{ - Event, - EventHandler, - }, - keybindings::{ - Action, - KeyBindings, - }, + events::{Event, EventHandler}, + keybindings::{Action, KeyBindings}, repl_command::ReplCommand, - state::{ - app_state::ConversionStats, - AppState, - ConversionHistory, - }, + state::{now_timestamp, AppState, ConversionHistory}, ui, }, }; +#[cfg(feature = "cli-stats")] +use crate::tui::state::ConversionStats; + /// Main TUI application managing state, events, and rendering. pub struct TuiApp<'a> { pub app_state: AppState<'a>, @@ -312,34 +294,48 @@ impl<'a> TuiApp<'a> { self.app_state.editor.set_output(toon_str.clone()); self.app_state.clear_error(); - if let Ok(bpe) = cl100k_base() { - let json_tokens = bpe.encode_with_special_tokens(input).len(); - let toon_tokens = bpe.encode_with_special_tokens(&toon_str).len(); - let json_bytes = input.len(); - let toon_bytes = toon_str.len(); - - let token_savings = - 100.0 * (1.0 - (toon_tokens as f64 / json_tokens as f64)); - let byte_savings = 100.0 * (1.0 - (toon_bytes as f64 / json_bytes as f64)); - - self.app_state.stats = Some(ConversionStats { - json_tokens, - toon_tokens, - json_bytes, - toon_bytes, - token_savings, - byte_savings, - }); - - self.app_state.file_state.add_to_history(ConversionHistory { - timestamp: Local::now(), - mode: "Encode".to_string(), - input_file: self.app_state.file_state.current_file.clone(), - output_file: None, - token_savings, - byte_savings, - }); + let json_bytes = input.len(); + let toon_bytes = toon_str.len(); + let byte_savings = if json_bytes == 0 { + 0.0 + } else { + 100.0 * (1.0 - (toon_bytes as f64 / json_bytes as f64)) + }; + + let mut history_entry = ConversionHistory { + timestamp: now_timestamp(), + mode: "Encode".to_string(), + input_file: self.app_state.file_state.current_file.clone(), + output_file: None, + token_savings: None, + byte_savings: Some(byte_savings), + }; + + self.app_state.stats = None; + + #[cfg(feature = "cli-stats")] + { + if let Ok(bpe) = cl100k_base() { + let json_tokens = bpe.encode_with_special_tokens(input).len(); + let toon_tokens = bpe.encode_with_special_tokens(&toon_str).len(); + + let token_savings = + 100.0 * (1.0 - (toon_tokens as f64 / json_tokens as f64)); + + self.app_state.stats = Some(ConversionStats { + json_tokens, + toon_tokens, + json_bytes, + toon_bytes, + token_savings, + byte_savings, + }); + + history_entry.token_savings = Some(token_savings); + } } + + self.app_state.file_state.add_to_history(history_entry); } Err(e) => { self.app_state.set_error(format!("Encode error: {e}")); @@ -360,34 +356,48 @@ impl<'a> TuiApp<'a> { self.app_state.editor.set_output(json_str.clone()); self.app_state.clear_error(); - if let Ok(bpe) = cl100k_base() { - let toon_tokens = bpe.encode_with_special_tokens(input).len(); - let json_tokens = bpe.encode_with_special_tokens(&json_str).len(); - let toon_bytes = input.len(); - let json_bytes = json_str.len(); - - let token_savings = - 100.0 * (1.0 - (toon_tokens as f64 / json_tokens as f64)); - let byte_savings = 100.0 * (1.0 - (toon_bytes as f64 / json_bytes as f64)); - - self.app_state.stats = Some(ConversionStats { - json_tokens, - toon_tokens, - json_bytes, - toon_bytes, - token_savings, - byte_savings, - }); - - self.app_state.file_state.add_to_history(ConversionHistory { - timestamp: Local::now(), - mode: "Decode".to_string(), - input_file: self.app_state.file_state.current_file.clone(), - output_file: None, - token_savings, - byte_savings, - }); + let toon_bytes = input.len(); + let json_bytes = json_str.len(); + let byte_savings = if toon_bytes == 0 { + 0.0 + } else { + 100.0 * (1.0 - (toon_bytes as f64 / json_bytes as f64)) + }; + + let mut history_entry = ConversionHistory { + timestamp: now_timestamp(), + mode: "Decode".to_string(), + input_file: self.app_state.file_state.current_file.clone(), + output_file: None, + token_savings: None, + byte_savings: Some(byte_savings), + }; + + self.app_state.stats = None; + + #[cfg(feature = "cli-stats")] + { + if let Ok(bpe) = cl100k_base() { + let toon_tokens = bpe.encode_with_special_tokens(input).len(); + let json_tokens = bpe.encode_with_special_tokens(&json_str).len(); + + let token_savings = + 100.0 * (1.0 - (toon_tokens as f64 / json_tokens as f64)); + + self.app_state.stats = Some(ConversionStats { + json_tokens, + toon_tokens, + json_bytes, + toon_bytes, + token_savings, + byte_savings, + }); + + history_entry.token_savings = Some(token_savings); + } } + + self.app_state.file_state.add_to_history(history_entry); } Err(e) => { self.app_state @@ -449,40 +459,60 @@ impl<'a> TuiApp<'a> { return Ok(()); } - #[cfg(not(target_os = "unknown"))] + #[cfg(feature = "tui-clipboard")] { - use arboard::Clipboard; - let mut clipboard = Clipboard::new()?; - clipboard.set_text(output)?; - self.app_state.set_status("Copied to clipboard".to_string()); + #[cfg(not(target_os = "unknown"))] + { + use arboard::Clipboard; + let mut clipboard = Clipboard::new()?; + clipboard.set_text(output)?; + self.app_state.set_status("Copied to clipboard".to_string()); + } + + #[cfg(target_os = "unknown")] + { + self.app_state + .set_error("Clipboard not supported on this platform".to_string()); + } } - #[cfg(target_os = "unknown")] + #[cfg(not(feature = "tui-clipboard"))] { - self.app_state - .set_error("Clipboard not supported on this platform".to_string()); + self.app_state.set_error( + "Clipboard support disabled (enable the 'tui-clipboard' feature)".to_string(), + ); } Ok(()) } fn paste_from_clipboard(&mut self) -> Result<()> { - #[cfg(not(target_os = "unknown"))] + #[cfg(feature = "tui-clipboard")] { - use arboard::Clipboard; - let mut clipboard = Clipboard::new()?; - let text = clipboard.get_text()?; - self.app_state.editor.set_input(text); - self.app_state.file_state.mark_modified(); - self.perform_conversion(); - self.app_state - .set_status("Pasted from clipboard".to_string()); + #[cfg(not(target_os = "unknown"))] + { + use arboard::Clipboard; + let mut clipboard = Clipboard::new()?; + let text = clipboard.get_text()?; + self.app_state.editor.set_input(text); + self.app_state.file_state.mark_modified(); + self.perform_conversion(); + self.app_state + .set_status("Pasted from clipboard".to_string()); + } + + #[cfg(target_os = "unknown")] + { + self.app_state + .set_error("Clipboard not supported on this platform".to_string()); + } } - #[cfg(target_os = "unknown")] + #[cfg(not(feature = "tui-clipboard"))] { - self.app_state - .set_error("Clipboard not supported on this platform".to_string()); + self.app_state.set_error( + "Clipboard support disabled (enable the 'tui-clipboard' feature)".to_string(), + ); } Ok(()) @@ -570,19 +600,29 @@ impl<'a> TuiApp<'a> { return Ok(()); } - #[cfg(not(target_os = "unknown"))] + #[cfg(feature = "tui-clipboard")] { - use arboard::Clipboard; - let mut clipboard = Clipboard::new()?; - clipboard.set_text(text)?; - self.app_state - .set_status("Copied selection to clipboard".to_string()); + #[cfg(not(target_os = "unknown"))] + { + use arboard::Clipboard; + let mut clipboard = Clipboard::new()?; + clipboard.set_text(text)?; + self.app_state + .set_status("Copied selection to clipboard".to_string()); + } + + #[cfg(target_os = "unknown")] + { + self.app_state + .set_error("Clipboard not supported on this platform".to_string()); + } } - #[cfg(target_os = "unknown")] + #[cfg(not(feature = "tui-clipboard"))] { - self.app_state - .set_error("Clipboard not supported on this platform".to_string()); + self.app_state.set_error( + "Clipboard support disabled (enable the 'tui-clipboard' feature)".to_string(), + ); } Ok(()) diff --git a/src/tui/components/diff_viewer.rs b/src/tui/components/diff_viewer.rs index 516e73d..9494fb8 100644 --- a/src/tui/components/diff_viewer.rs +++ b/src/tui/components/diff_viewer.rs @@ -1,30 +1,13 @@ //! Side-by-side diff viewer for input/output comparison. use ratatui::{ - layout::{ - Alignment, - Constraint, - Direction, - Layout, - Rect, - }, - text::{ - Line, - Span, - }, - widgets::{ - Block, - Borders, - Paragraph, - Wrap, - }, + layout::{Alignment, Constraint, Direction, Layout, Rect}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, Frame, }; -use crate::tui::{ - state::AppState, - theme::Theme, -}; +use crate::tui::{state::AppState, theme::Theme}; pub struct DiffViewer; diff --git a/src/tui/components/editor.rs b/src/tui/components/editor.rs index 6171f21..00fb043 100644 --- a/src/tui/components/editor.rs +++ b/src/tui/components/editor.rs @@ -2,17 +2,11 @@ use ratatui::{ layout::Rect, - widgets::{ - Block, - Borders, - }, + widgets::{Block, Borders}, Frame, }; -use crate::tui::{ - state::AppState, - theme::Theme, -}; +use crate::tui::{state::AppState, theme::Theme}; pub struct EditorComponent; diff --git a/src/tui/components/file_browser.rs b/src/tui/components/file_browser.rs index 52baf55..4baf080 100644 --- a/src/tui/components/file_browser.rs +++ b/src/tui/components/file_browser.rs @@ -3,31 +3,13 @@ use std::fs; use ratatui::{ - layout::{ - Alignment, - Constraint, - Direction, - Layout, - Rect, - }, - text::{ - Line, - Span, - }, - widgets::{ - Block, - Borders, - List, - ListItem, - Paragraph, - }, + layout::{Alignment, Constraint, Direction, Layout, Rect}, + text::{Line, Span}, + widgets::{Block, Borders, List, ListItem, Paragraph}, Frame, }; -use crate::tui::{ - state::AppState, - theme::Theme, -}; +use crate::tui::{state::AppState, theme::Theme}; /// File browser state and rendering. pub struct FileBrowser { diff --git a/src/tui/components/help_screen.rs b/src/tui/components/help_screen.rs index 2f2be40..8cfe995 100644 --- a/src/tui/components/help_screen.rs +++ b/src/tui/components/help_screen.rs @@ -1,31 +1,13 @@ //! Help screen showing keyboard shortcuts. use ratatui::{ - layout::{ - Alignment, - Constraint, - Direction, - Layout, - Rect, - }, - text::{ - Line, - Span, - }, - widgets::{ - Block, - Borders, - List, - ListItem, - Paragraph, - }, + layout::{Alignment, Constraint, Direction, Layout, Rect}, + text::{Line, Span}, + widgets::{Block, Borders, List, ListItem, Paragraph}, Frame, }; -use crate::tui::{ - keybindings::KeyBindings, - theme::Theme, -}; +use crate::tui::{keybindings::KeyBindings, theme::Theme}; pub struct HelpScreen; diff --git a/src/tui/components/history_panel.rs b/src/tui/components/history_panel.rs index 9bdc945..273f310 100644 --- a/src/tui/components/history_panel.rs +++ b/src/tui/components/history_panel.rs @@ -1,29 +1,14 @@ //! Conversion history panel. use ratatui::{ - layout::{ - Alignment, - Constraint, - Direction, - Layout, - Rect, - }, - text::{ - Line, - Span, - }, - widgets::{ - Block, - Borders, - List, - ListItem, - Paragraph, - }, + layout::{Alignment, Constraint, Direction, Layout, Rect}, + text::{Line, Span}, + widgets::{Block, Borders, List, ListItem, Paragraph}, Frame, }; use crate::tui::{ - state::AppState, + state::{format_timestamp, AppState}, theme::Theme, }; @@ -66,7 +51,7 @@ impl HistoryPanel { .iter() .rev() .map(|entry| { - let time_str = entry.timestamp.format("%H:%M:%S").to_string(); + let time_str = format_timestamp(&entry.timestamp); let file_str = entry .input_file .as_ref() @@ -74,18 +59,33 @@ impl HistoryPanel { .and_then(|n| n.to_str()) .unwrap_or("stdin"); - ListItem::new(Line::from(vec![ - Span::styled(format!(" {time_str} "), theme.line_number_style()), - Span::styled(format!("[{}] ", entry.mode), theme.info_style()), - Span::styled(file_str, theme.normal_style()), - Span::styled( - format!(" → {:.1}% saved", entry.token_savings), - if entry.token_savings > 0.0 { + let (savings_text, savings_style) = match entry.token_savings { + Some(token_savings) => ( + format!(" → {:.1}% saved", token_savings), + if token_savings > 0.0 { theme.success_style() } else { theme.warning_style() }, ), + None => match entry.byte_savings { + Some(byte_savings) => ( + format!(" → {:.1}% bytes", byte_savings), + if byte_savings > 0.0 { + theme.success_style() + } else { + theme.warning_style() + }, + ), + None => (" → n/a".to_string(), theme.line_number_style()), + }, + }; + + ListItem::new(Line::from(vec![ + Span::styled(format!(" {time_str} "), theme.line_number_style()), + Span::styled(format!("[{}] ", entry.mode), theme.info_style()), + Span::styled(file_str, theme.normal_style()), + Span::styled(savings_text, savings_style), ])) }) .collect(); diff --git a/src/tui/components/repl_panel.rs b/src/tui/components/repl_panel.rs index 681a566..5acb36c 100644 --- a/src/tui/components/repl_panel.rs +++ b/src/tui/components/repl_panel.rs @@ -1,36 +1,12 @@ use ratatui::{ - layout::{ - Constraint, - Direction, - Layout, - Margin, - Rect, - }, - style::{ - Color, - Modifier, - Style, - }, - text::{ - Line, - Span, - }, - widgets::{ - Block, - Borders, - Paragraph, - Scrollbar, - ScrollbarOrientation, - ScrollbarState, - Wrap, - }, + layout::{Constraint, Direction, Layout, Margin, Rect}, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Scrollbar, ScrollbarOrientation, ScrollbarState, Wrap}, Frame, }; -use crate::tui::state::{ - AppState, - ReplLineKind, -}; +use crate::tui::state::{AppState, ReplLineKind}; pub struct ReplPanel; diff --git a/src/tui/components/settings_panel.rs b/src/tui/components/settings_panel.rs index c8d84c1..a7debad 100644 --- a/src/tui/components/settings_panel.rs +++ b/src/tui/components/settings_panel.rs @@ -1,38 +1,15 @@ //! Settings panel for configuring encode/decode options. use ratatui::{ - layout::{ - Alignment, - Constraint, - Direction, - Layout, - Rect, - }, - text::{ - Line, - Span, - }, - widgets::{ - Block, - Borders, - List, - ListItem, - Paragraph, - }, + layout::{Alignment, Constraint, Direction, Layout, Rect}, + text::{Line, Span}, + widgets::{Block, Borders, List, ListItem, Paragraph}, Frame, }; use crate::{ - tui::{ - state::AppState, - theme::Theme, - }, - types::{ - Delimiter, - Indent, - KeyFoldingMode, - PathExpansionMode, - }, + tui::{state::AppState, theme::Theme}, + types::{Delimiter, Indent, KeyFoldingMode, PathExpansionMode}, }; pub struct SettingsPanel; diff --git a/src/tui/components/stats_bar.rs b/src/tui/components/stats_bar.rs index 3691d9b..4afb0e8 100644 --- a/src/tui/components/stats_bar.rs +++ b/src/tui/components/stats_bar.rs @@ -2,22 +2,12 @@ use ratatui::{ layout::Rect, - text::{ - Line, - Span, - }, - widgets::{ - Block, - Borders, - Paragraph, - }, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph}, Frame, }; -use crate::tui::{ - state::AppState, - theme::Theme, -}; +use crate::tui::{state::AppState, theme::Theme}; pub struct StatsBar; diff --git a/src/tui/components/status_bar.rs b/src/tui/components/status_bar.rs index 8d1fb00..999f650 100644 --- a/src/tui/components/status_bar.rs +++ b/src/tui/components/status_bar.rs @@ -1,29 +1,13 @@ //! Status bar showing mode, file, and key commands. use ratatui::{ - layout::{ - Alignment, - Constraint, - Direction, - Layout, - Rect, - }, - text::{ - Line, - Span, - }, - widgets::{ - Block, - Borders, - Paragraph, - }, + layout::{Alignment, Constraint, Direction, Layout, Rect}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph}, Frame, }; -use crate::tui::{ - state::AppState, - theme::Theme, -}; +use crate::tui::{state::AppState, theme::Theme}; pub struct StatusBar; diff --git a/src/tui/events.rs b/src/tui/events.rs index 64a6e22..0d22c91 100644 --- a/src/tui/events.rs +++ b/src/tui/events.rs @@ -2,11 +2,7 @@ use std::time::Duration; -use crossterm::event::{ - self, - Event as CrosstermEvent, - KeyEvent, -}; +use crossterm::event::{self, Event as CrosstermEvent, KeyEvent}; /// TUI events. pub enum Event { diff --git a/src/tui/keybindings.rs b/src/tui/keybindings.rs index e82ccb1..f755e02 100644 --- a/src/tui/keybindings.rs +++ b/src/tui/keybindings.rs @@ -1,10 +1,6 @@ //! Keyboard shortcuts and action mapping. -use crossterm::event::{ - KeyCode, - KeyEvent, - KeyModifiers, -}; +use crossterm::event::{KeyCode, KeyEvent, KeyModifiers}; /// Actions that can be triggered by keyboard shortcuts. #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/src/tui/mod.rs b/src/tui/mod.rs index faa352b..a7037ed 100644 --- a/src/tui/mod.rs +++ b/src/tui/mod.rs @@ -18,17 +18,9 @@ use anyhow::Result; pub use app::TuiApp; use crossterm::{ execute, - terminal::{ - disable_raw_mode, - enable_raw_mode, - EnterAlternateScreen, - LeaveAlternateScreen, - }, -}; -use ratatui::{ - backend::CrosstermBackend, - Terminal, + terminal::{disable_raw_mode, enable_raw_mode, EnterAlternateScreen, LeaveAlternateScreen}, }; +use ratatui::{backend::CrosstermBackend, Terminal}; /// Initialize and run the TUI application. /// diff --git a/src/tui/repl_command.rs b/src/tui/repl_command.rs index 5191f2f..33e15ee 100644 --- a/src/tui/repl_command.rs +++ b/src/tui/repl_command.rs @@ -1,9 +1,6 @@ //! REPL command parser with inline data support -use anyhow::{ - bail, - Result, -}; +use anyhow::{bail, Result}; /// Parsed REPL command with inline data #[derive(Debug, Clone)] diff --git a/src/tui/state/app_state.rs b/src/tui/state/app_state.rs index 7c70b32..1ef8408 100644 --- a/src/tui/state/app_state.rs +++ b/src/tui/state/app_state.rs @@ -1,20 +1,9 @@ //! Main application state. -use super::{ - EditorState, - FileState, - ReplState, -}; +use super::{EditorState, FileState, ReplState}; use crate::{ tui::theme::Theme, - types::{ - DecodeOptions, - Delimiter, - EncodeOptions, - Indent, - KeyFoldingMode, - PathExpansionMode, - }, + types::{DecodeOptions, Delimiter, EncodeOptions, Indent, KeyFoldingMode, PathExpansionMode}, }; /// Conversion mode (encode/decode). @@ -188,96 +177,73 @@ impl<'a> AppState<'a> { } pub fn cycle_delimiter(&mut self) { - self.encode_options = - self.encode_options - .clone() - .with_delimiter(match self.encode_options.delimiter { - Delimiter::Comma => Delimiter::Tab, - Delimiter::Tab => Delimiter::Pipe, - Delimiter::Pipe => Delimiter::Comma, - }); + self.encode_options.delimiter = match self.encode_options.delimiter { + Delimiter::Comma => Delimiter::Tab, + Delimiter::Tab => Delimiter::Pipe, + Delimiter::Pipe => Delimiter::Comma, + }; } pub fn increase_indent(&mut self) { let Indent::Spaces(current) = self.encode_options.indent; if current < 8 { - self.encode_options = self - .encode_options - .clone() - .with_indent(Indent::Spaces(current + 1)); + self.encode_options.indent = Indent::Spaces(current + 1); } } pub fn decrease_indent(&mut self) { let Indent::Spaces(current) = self.encode_options.indent; if current > 1 { - self.encode_options = self - .encode_options - .clone() - .with_indent(Indent::Spaces(current - 1)); + self.encode_options.indent = Indent::Spaces(current - 1); } } pub fn toggle_fold_keys(&mut self) { - self.encode_options = - self.encode_options - .clone() - .with_key_folding(match self.encode_options.key_folding { - KeyFoldingMode::Off => KeyFoldingMode::Safe, - KeyFoldingMode::Safe => KeyFoldingMode::Off, - }); + self.encode_options.key_folding = match self.encode_options.key_folding { + KeyFoldingMode::Off => KeyFoldingMode::Safe, + KeyFoldingMode::Safe => KeyFoldingMode::Off, + }; } pub fn increase_flatten_depth(&mut self) { if self.encode_options.flatten_depth == usize::MAX { - self.encode_options = self.encode_options.clone().with_flatten_depth(2); + self.encode_options.flatten_depth = 2; } else if self.encode_options.flatten_depth < 10 { - self.encode_options = self - .encode_options - .clone() - .with_flatten_depth(self.encode_options.flatten_depth + 1); + self.encode_options.flatten_depth += 1; } } pub fn decrease_flatten_depth(&mut self) { if self.encode_options.flatten_depth == 2 { - self.encode_options = self.encode_options.clone().with_flatten_depth(usize::MAX); + self.encode_options.flatten_depth = usize::MAX; } else if self.encode_options.flatten_depth > 2 && self.encode_options.flatten_depth != usize::MAX { - self.encode_options = self - .encode_options - .clone() - .with_flatten_depth(self.encode_options.flatten_depth - 1); + self.encode_options.flatten_depth -= 1; } } pub fn toggle_flatten_depth(&mut self) { if self.encode_options.flatten_depth == usize::MAX { - self.encode_options = self.encode_options.clone().with_flatten_depth(2); + self.encode_options.flatten_depth = 2; } else { - self.encode_options = self.encode_options.clone().with_flatten_depth(usize::MAX); + self.encode_options.flatten_depth = usize::MAX; } } pub fn toggle_expand_paths(&mut self) { - self.decode_options = - self.decode_options - .clone() - .with_expand_paths(match self.decode_options.expand_paths { - PathExpansionMode::Off => PathExpansionMode::Safe, - PathExpansionMode::Safe => PathExpansionMode::Off, - }); + self.decode_options.expand_paths = match self.decode_options.expand_paths { + PathExpansionMode::Off => PathExpansionMode::Safe, + PathExpansionMode::Safe => PathExpansionMode::Off, + }; } pub fn toggle_strict(&mut self) { - let strict = !self.decode_options.strict; - self.decode_options = self.decode_options.clone().with_strict(strict); + self.decode_options.strict = !self.decode_options.strict; } pub fn toggle_coerce_types(&mut self) { - let coerce = !self.decode_options.coerce_types; - self.decode_options = self.decode_options.clone().with_coerce_types(coerce); + self.decode_options.coerce_types = !self.decode_options.coerce_types; } } diff --git a/src/tui/state/file_state.rs b/src/tui/state/file_state.rs index 436e971..bdff523 100644 --- a/src/tui/state/file_state.rs +++ b/src/tui/state/file_state.rs @@ -2,10 +2,42 @@ use std::path::PathBuf; -use chrono::{ - DateTime, - Local, -}; +#[cfg(feature = "tui-time")] +use chrono::{DateTime, Local}; + +#[cfg(feature = "tui-time")] +pub type Timestamp = DateTime; + +#[cfg(not(feature = "tui-time"))] +pub type Timestamp = (); + +pub fn now_timestamp() -> Option { + #[cfg(feature = "tui-time")] + { + Some(Local::now()) + } + + #[cfg(not(feature = "tui-time"))] + { + None + } +} + +pub fn format_timestamp(timestamp: &Option) -> String { + #[cfg(feature = "tui-time")] + { + timestamp + .as_ref() + .map(|ts| ts.format("%H:%M:%S").to_string()) + .unwrap_or_else(|| "--:--:--".to_string()) + } + + #[cfg(not(feature = "tui-time"))] + { + let _ = timestamp; + "--:--:--".to_string() + } +} /// A file or directory entry. #[derive(Debug, Clone)] @@ -13,7 +45,7 @@ pub struct FileEntry { pub path: PathBuf, pub is_dir: bool, pub size: u64, - pub modified: Option>, + pub modified: Option, } impl FileEntry { @@ -37,12 +69,12 @@ impl FileEntry { /// Record of a conversion operation. #[derive(Debug, Clone)] pub struct ConversionHistory { - pub timestamp: DateTime, + pub timestamp: Option, pub mode: String, pub input_file: Option, pub output_file: Option, - pub token_savings: f64, - pub byte_savings: f64, + pub token_savings: Option, + pub byte_savings: Option, } /// File browser and conversion history state. diff --git a/src/tui/state/mod.rs b/src/tui/state/mod.rs index 9943270..a3a5d52 100644 --- a/src/tui/state/mod.rs +++ b/src/tui/state/mod.rs @@ -5,21 +5,7 @@ pub mod editor_state; pub mod file_state; pub mod repl_state; -pub use app_state::{ - AppState, - ConversionStats, - Mode, -}; -pub use editor_state::{ - EditorMode, - EditorState, -}; -pub use file_state::{ - ConversionHistory, - FileState, -}; -pub use repl_state::{ - ReplLine, - ReplLineKind, - ReplState, -}; +pub use app_state::{AppState, ConversionStats, Mode}; +pub use editor_state::{EditorMode, EditorState}; +pub use file_state::{format_timestamp, now_timestamp, ConversionHistory, FileState}; +pub use repl_state::{ReplLine, ReplLineKind, ReplState}; diff --git a/src/tui/theme.rs b/src/tui/theme.rs index 8d436d0..3cf5e56 100644 --- a/src/tui/theme.rs +++ b/src/tui/theme.rs @@ -1,10 +1,6 @@ //! Color themes for the TUI. -use ratatui::style::{ - Color, - Modifier, - Style, -}; +use ratatui::style::{Color, Modifier, Style}; /// Available color themes. #[derive(Debug, Clone, Copy, PartialEq, Default)] diff --git a/src/tui/ui.rs b/src/tui/ui.rs index 2910c37..22e3ae2 100644 --- a/src/tui/ui.rs +++ b/src/tui/ui.rs @@ -1,42 +1,19 @@ use ratatui::{ - layout::{ - Alignment, - Constraint, - Direction, - Layout, - Rect, - }, - text::{ - Line, - Span, - }, - widgets::{ - Block, - Borders, - Paragraph, - }, + layout::{Alignment, Constraint, Direction, Layout, Rect}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph}, Frame, }; use super::{ components::{ - DiffViewer, - EditorComponent, - FileBrowser, - HelpScreen, - HistoryPanel, - ReplPanel, - SettingsPanel, - StatsBar, - StatusBar, + DiffViewer, EditorComponent, FileBrowser, HelpScreen, HistoryPanel, ReplPanel, + SettingsPanel, StatsBar, StatusBar, }, state::AppState, theme::Theme, }; -use crate::types::{ - KeyFoldingMode, - PathExpansionMode, -}; +use crate::types::{KeyFoldingMode, PathExpansionMode}; /// Main render function - orchestrates all UI components. pub fn render(f: &mut Frame, app: &mut AppState, file_browser: &mut FileBrowser) { diff --git a/src/types/delimiter.rs b/src/types/delimiter.rs index e554e6a..cb387a5 100644 --- a/src/types/delimiter.rs +++ b/src/types/delimiter.rs @@ -1,9 +1,6 @@ use std::fmt; -use serde::{ - Deserialize, - Serialize, -}; +use serde::{Deserialize, Serialize}; /// Delimiter character used to separate array elements. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] diff --git a/src/types/errors.rs b/src/types/errors.rs index ad85f33..5c3e70c 100644 --- a/src/types/errors.rs +++ b/src/types/errors.rs @@ -1,3 +1,4 @@ +use std::sync::Arc; use thiserror::Error; /// Result type alias for TOON operations. @@ -50,31 +51,83 @@ pub enum ToonError { /// Contextual information for error reporting, including source location /// and suggestions. +#[derive(Debug, Clone, PartialEq, Eq)] +enum ErrorContextSource { + Inline { + source_line: String, + preceding_lines: Vec, + following_lines: Vec, + indicator: Option, + }, + Lazy { + input: Arc, + line: usize, + column: usize, + context_lines: usize, + }, +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct ErrorContext { - pub source_line: String, - pub preceding_lines: Vec, - pub following_lines: Vec, + source: ErrorContextSource, pub suggestion: Option, - pub indicator: Option, } impl std::fmt::Display for ErrorContext { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { writeln!(f, "\nContext:")?; - for line in &self.preceding_lines { - writeln!(f, " {line}")?; - } + match &self.source { + ErrorContextSource::Inline { + source_line, + preceding_lines, + following_lines, + indicator, + } => { + for line in preceding_lines { + writeln!(f, " {line}")?; + } - writeln!(f, "> {}", self.source_line)?; + writeln!(f, "> {source_line}")?; - if let Some(indicator) = &self.indicator { - writeln!(f, " {indicator}")?; - } + if let Some(indicator) = indicator { + writeln!(f, " {indicator}")?; + } + + for line in following_lines { + writeln!(f, " {line}")?; + } + } + ErrorContextSource::Lazy { + input, + line, + column, + context_lines, + } => { + let lines: Vec<&str> = input.lines().collect(); + if *line == 0 || *line > lines.len() { + return Ok(()); + } - for line in &self.following_lines { - writeln!(f, " {line}")?; + let line_idx = line - 1; + let start_line = line_idx.saturating_sub(*context_lines); + let end_line = (line_idx + context_lines + 1).min(lines.len()); + + for line in &lines[start_line..line_idx] { + writeln!(f, " {line}")?; + } + + writeln!(f, "> {}", lines[line_idx])?; + + if *column > 0 { + let indicator = " ".repeat(column.saturating_sub(1)); + writeln!(f, " {indicator}^")?; + } + + for line in &lines[(line_idx + 1)..end_line] { + writeln!(f, " {line}")?; + } + } } if let Some(suggestion) = &self.suggestion { @@ -87,27 +140,71 @@ impl std::fmt::Display for ErrorContext { impl std::error::Error for ErrorContext {} +fn extract_lines_to_strings( + input: &str, + line: usize, + context_lines: usize, +) -> (String, Vec, Vec) { + let lines: Vec<&str> = input.lines().collect(); + + if line == 0 || line > lines.len() { + return (String::new(), Vec::new(), Vec::new()); + } + + let line_idx = line - 1; + let source_line = lines.get(line_idx).unwrap_or(&"").to_string(); + + let start_line = line_idx.saturating_sub(context_lines); + let end_line = (line_idx + context_lines + 1).min(lines.len()); + + let preceding_lines = lines[start_line..line_idx] + .iter() + .map(|s| s.to_string()) + .collect(); + + let following_lines = lines[(line_idx + 1)..end_line] + .iter() + .map(|s| s.to_string()) + .collect(); + + (source_line, preceding_lines, following_lines) +} + impl ErrorContext { /// Create a new error context with a source line. pub fn new(source_line: impl Into) -> Self { Self { - source_line: source_line.into(), - preceding_lines: Vec::new(), - following_lines: Vec::new(), + source: ErrorContextSource::Inline { + source_line: source_line.into(), + preceding_lines: Vec::new(), + following_lines: Vec::new(), + indicator: None, + }, suggestion: None, - indicator: None, } } /// Add preceding context lines. pub fn with_preceding_lines(mut self, lines: Vec) -> Self { - self.preceding_lines = lines; + self.ensure_inline(); + if let ErrorContextSource::Inline { + preceding_lines, .. + } = &mut self.source + { + *preceding_lines = lines; + } self } /// Add following context lines. pub fn with_following_lines(mut self, lines: Vec) -> Self { - self.following_lines = lines; + self.ensure_inline(); + if let ErrorContextSource::Inline { + following_lines, .. + } = &mut self.source + { + *following_lines = lines; + } self } @@ -119,49 +216,72 @@ impl ErrorContext { /// Add a column indicator (caret) pointing to the error position. pub fn with_indicator(mut self, column: usize) -> Self { - let indicator = format!("{}^", " ".repeat(column)); - self.indicator = Some(indicator); + self.ensure_inline(); + if let ErrorContextSource::Inline { indicator, .. } = &mut self.source { + let indicator_value = format!("{}^", " ".repeat(column.saturating_sub(1))); + *indicator = Some(indicator_value); + } self } - /// Create error context from input string with automatic context - /// extraction. - pub fn from_input( - input: &str, + fn ensure_inline(&mut self) { + if let ErrorContextSource::Lazy { + input, + line, + column, + context_lines, + } = &self.source + { + let (source_line, preceding_lines, following_lines) = + extract_lines_to_strings(input, *line, *context_lines); + let indicator = if *column > 0 { + Some(format!("{}^", " ".repeat(column.saturating_sub(1)))) + } else { + None + }; + + self.source = ErrorContextSource::Inline { + source_line, + preceding_lines, + following_lines, + indicator, + }; + } + } + + /// Create error context from a shared input buffer with lazy extraction. + pub fn from_shared_input( + input: Arc, line: usize, column: usize, context_lines: usize, ) -> Option { - let lines: Vec<&str> = input.lines().collect(); - - if line == 0 || line > lines.len() { + let line_count = input.lines().count(); + if line == 0 || line > line_count { return None; } - let line_idx = line - 1; - let source_line = lines.get(line_idx)?.to_string(); - - let start_line = line_idx.saturating_sub(context_lines); - let end_line = (line_idx + context_lines + 1).min(lines.len()); - - let preceding_lines = lines[start_line..line_idx] - .iter() - .map(|s| s.to_string()) - .collect(); - - let following_lines = lines[(line_idx + 1)..end_line] - .iter() - .map(|s| s.to_string()) - .collect(); - Some(Self { - source_line, - preceding_lines, - following_lines, + source: ErrorContextSource::Lazy { + input, + line, + column, + context_lines, + }, suggestion: None, - indicator: Some(format!("{}^", " ".repeat(column.saturating_sub(1)))), }) } + + /// Create error context from input string with automatic context + /// extraction. + pub fn from_input( + input: &str, + line: usize, + column: usize, + context_lines: usize, + ) -> Option { + Self::from_shared_input(Arc::from(input), line, column, context_lines) + } } impl ToonError { @@ -285,9 +405,18 @@ mod tests { .with_suggestion("Try using quotes") .with_indicator(5); - assert_eq!(ctx.source_line, "test line"); + match &ctx.source { + ErrorContextSource::Inline { + source_line, + indicator, + .. + } => { + assert_eq!(source_line, "test line"); + assert!(indicator.is_some()); + } + ErrorContextSource::Lazy { .. } => panic!("Expected inline context"), + } assert_eq!(ctx.suggestion, Some("Try using quotes".to_string())); - assert!(ctx.indicator.is_some()); } #[test] @@ -297,9 +426,24 @@ mod tests { assert!(ctx.is_some()); let ctx = ctx.unwrap(); - assert_eq!(ctx.source_line, "line 2 with error"); - assert_eq!(ctx.preceding_lines, vec!["line 1"]); - assert_eq!(ctx.following_lines, vec!["line 3"]); + match &ctx.source { + ErrorContextSource::Lazy { + line, + column, + context_lines, + .. + } => { + assert_eq!(*line, 2); + assert_eq!(*column, 6); + assert_eq!(*context_lines, 1); + } + ErrorContextSource::Inline { .. } => panic!("Expected lazy context"), + } + + let rendered = format!("{ctx}"); + assert!(rendered.contains("line 2 with error")); + assert!(rendered.contains("line 1")); + assert!(rendered.contains("line 3")); } #[test] diff --git a/src/types/folding.rs b/src/types/folding.rs index 2c7d272..04c4194 100644 --- a/src/types/folding.rs +++ b/src/types/folding.rs @@ -19,24 +19,21 @@ pub enum PathExpansionMode { /// Check if a key segment is a valid IdentifierSegment (stricter than unquoted /// keys). pub fn is_identifier_segment(s: &str) -> bool { - if s.is_empty() { + let bytes = s.as_bytes(); + if bytes.is_empty() { return false; } - let mut chars = s.chars(); - // First character must be letter or underscore - let first = match chars.next() { - Some(c) => c, - None => return false, - }; - - if !first.is_alphabetic() && first != '_' { + let first = bytes[0]; + if !first.is_ascii_alphabetic() && first != b'_' { return false; } // Remaining characters: letters, digits, or underscore (NO dots) - chars.all(|c| c.is_alphanumeric() || c == '_') + bytes[1..] + .iter() + .all(|b| b.is_ascii_alphanumeric() || *b == b'_') } #[cfg(test)] diff --git a/src/types/mod.rs b/src/types/mod.rs index 42b524f..ebfaf68 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -5,23 +5,7 @@ mod options; mod value; pub use delimiter::Delimiter; -pub use errors::{ - ErrorContext, - ToonError, - ToonResult, -}; -pub use folding::{ - is_identifier_segment, - KeyFoldingMode, - PathExpansionMode, -}; -pub use options::{ - DecodeOptions, - EncodeOptions, - Indent, -}; -pub use value::{ - IntoJsonValue, - JsonValue, - Number, -}; +pub use errors::{ErrorContext, ToonError, ToonResult}; +pub use folding::{is_identifier_segment, KeyFoldingMode, PathExpansionMode}; +pub use options::{DecodeOptions, EncodeOptions, Indent}; +pub use value::{IntoJsonValue, JsonValue, Number}; diff --git a/src/types/options.rs b/src/types/options.rs index 5e1ecb3..ec4562c 100644 --- a/src/types/options.rs +++ b/src/types/options.rs @@ -1,10 +1,6 @@ use crate::{ constants::DEFAULT_INDENT, - types::{ - Delimiter, - KeyFoldingMode, - PathExpansionMode, - }, + types::{Delimiter, KeyFoldingMode, PathExpansionMode}, }; #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/src/types/value.rs b/src/types/value.rs index 15885cb..3e55d10 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -1,9 +1,6 @@ use std::{ fmt, - ops::{ - Index, - IndexMut, - }, + ops::{Index, IndexMut}, }; use indexmap::IndexMap; @@ -327,6 +324,20 @@ impl JsonValue { } } + pub fn get(&self, key: &str) -> Option<&JsonValue> { + match self { + JsonValue::Object(obj) => obj.get(key), + _ => None, + } + } + + pub fn get_index(&self, index: usize) -> Option<&JsonValue> { + match self { + JsonValue::Array(arr) => arr.get(index), + _ => None, + } + } + /// Takes the value, leaving Null in its place. pub fn take(&mut self) -> JsonValue { std::mem::replace(self, JsonValue::Null) @@ -380,8 +391,16 @@ impl Index for JsonValue { fn index(&self, index: usize) -> &Self::Output { match self { - JsonValue::Array(arr) => &arr[index], - _ => panic!("cannot index into non-array value with usize"), + JsonValue::Array(arr) => arr.get(index).unwrap_or_else(|| { + panic!( + "index {index} out of bounds for array of length {}", + arr.len() + ) + }), + _ => panic!( + "cannot index into non-array value of type {}", + self.type_name() + ), } } } @@ -389,8 +408,16 @@ impl Index for JsonValue { impl IndexMut for JsonValue { fn index_mut(&mut self, index: usize) -> &mut Self::Output { match self { - JsonValue::Array(arr) => &mut arr[index], - _ => panic!("cannot index into non-array value with usize"), + JsonValue::Array(arr) => { + let len = arr.len(); + arr.get_mut(index).unwrap_or_else(|| { + panic!("index {index} out of bounds for array of length {len}") + }) + } + _ => panic!( + "cannot index into non-array value of type {}", + self.type_name() + ), } } } @@ -400,10 +427,13 @@ impl Index<&str> for JsonValue { fn index(&self, key: &str) -> &Self::Output { match self { - JsonValue::Object(obj) => obj - .get(key) - .unwrap_or_else(|| panic!("key '{key}' not found in object")), - _ => panic!("cannot index into non-object value with &str"), + JsonValue::Object(obj) => obj.get(key).unwrap_or_else(|| { + panic!("key '{key}' not found in object with {} entries", obj.len()) + }), + _ => panic!( + "cannot index into non-object value of type {}", + self.type_name() + ), } } } @@ -411,10 +441,15 @@ impl Index<&str> for JsonValue { impl IndexMut<&str> for JsonValue { fn index_mut(&mut self, key: &str) -> &mut Self::Output { match self { - JsonValue::Object(obj) => obj - .get_mut(key) - .unwrap_or_else(|| panic!("key '{key}' not found in object")), - _ => panic!("cannot index into non-object value with &str"), + JsonValue::Object(obj) => { + let len = obj.len(); + obj.get_mut(key) + .unwrap_or_else(|| panic!("key '{key}' not found in object with {len} entries")) + } + _ => panic!( + "cannot index into non-object value of type {}", + self.type_name() + ), } } } diff --git a/src/utils/literal.rs b/src/utils/literal.rs index 3f8aeea..7cf2c7c 100644 --- a/src/utils/literal.rs +++ b/src/utils/literal.rs @@ -17,34 +17,32 @@ pub fn is_structural_char(ch: char) -> bool { /// Check if a string looks like a number (starts with digit, no leading zeros). pub fn is_numeric_like(s: &str) -> bool { - if s.is_empty() { + let bytes = s.as_bytes(); + if bytes.is_empty() { return false; } - let chars: Vec = s.chars().collect(); let mut i = 0; - - if chars[i] == '-' { - i += 1; + if bytes[0] == b'-' { + i = 1; } - if i >= chars.len() { + if i >= bytes.len() { return false; } - if !chars[i].is_ascii_digit() { + let first = bytes[i]; + if !first.is_ascii_digit() { return false; } - if chars[i] == '0' && i + 1 < chars.len() && chars[i + 1].is_ascii_digit() { + if first == b'0' && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit() { return false; } - let has_valid_chars = chars[i..].iter().all(|c| { - c.is_ascii_digit() || *c == '.' || *c == 'e' || *c == 'E' || *c == '+' || *c == '-' - }); - - has_valid_chars + bytes[i..].iter().all(|b| { + b.is_ascii_digit() || *b == b'.' || *b == b'e' || *b == b'E' || *b == b'+' || *b == b'-' + }) } #[cfg(test)] diff --git a/src/utils/mod.rs b/src/utils/mod.rs index cf355c6..d7d3e54 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -4,25 +4,17 @@ pub mod string; pub mod validation; use indexmap::IndexMap; -pub use literal::{ - is_keyword, - is_literal_like, - is_numeric_like, - is_structural_char, -}; -pub use number::format_canonical_number; +pub use literal::{is_keyword, is_literal_like, is_numeric_like, is_structural_char}; +pub use number::{format_canonical_number, write_canonical_number_into}; pub use string::{ - escape_string, - is_valid_unquoted_key, - needs_quoting, - quote_string, + escape_string, escape_string_into, is_valid_unquoted_key, needs_quoting, quote_string, unescape_string, }; -use crate::types::{ - JsonValue as Value, - Number, -}; +#[cfg(feature = "parallel")] +use rayon::prelude::*; + +use crate::types::{JsonValue as Value, Number}; /// Context for determining when quoting is needed. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -31,6 +23,9 @@ pub enum QuotingContext { ArrayValue, } +#[cfg(feature = "parallel")] +const PARALLEL_THRESHOLD: usize = 256; + /// Normalize a JSON value (converts NaN/Infinity to null, -0 to 0). pub fn normalize(value: Value) -> Value { match value { @@ -51,11 +46,31 @@ pub fn normalize(value: Value) -> Value { } } Value::Object(obj) => { + #[cfg(feature = "parallel")] + { + if obj.len() >= PARALLEL_THRESHOLD { + let entries: Vec<(String, Value)> = obj.into_iter().collect(); + let normalized_entries: Vec<(String, Value)> = entries + .into_par_iter() + .map(|(k, v)| (k, normalize(v))) + .collect(); + return Value::Object(IndexMap::from_iter(normalized_entries)); + } + } + let normalized: IndexMap = obj.into_iter().map(|(k, v)| (k, normalize(v))).collect(); Value::Object(normalized) } Value::Array(arr) => { + #[cfg(feature = "parallel")] + { + if arr.len() >= PARALLEL_THRESHOLD { + let normalized: Vec = arr.into_par_iter().map(normalize).collect(); + return Value::Array(normalized); + } + } + let normalized: Vec = arr.into_iter().map(normalize).collect(); Value::Array(normalized) } diff --git a/src/utils/number.rs b/src/utils/number.rs index 0a62f78..bb16139 100644 --- a/src/utils/number.rs +++ b/src/utils/number.rs @@ -1,41 +1,60 @@ +use itoa::Buffer as ItoaBuffer; +use ryu::Buffer as RyuBuffer; + use crate::types::Number; /// Format a number in TOON canonical form (no exponents, no trailing zeros). pub fn format_canonical_number(n: &Number) -> String { - if let Some(i) = n.as_i64() { - return i.to_string(); - } + let mut out = String::new(); + write_canonical_number_into(n, &mut out); + out +} - if let Some(u) = n.as_u64() { - return u.to_string(); +pub fn write_canonical_number_into(n: &Number, out: &mut String) { + match n { + Number::PosInt(u) => write_u64(out, *u), + Number::NegInt(i) => write_i64(out, *i), + Number::Float(f) => write_f64_canonical_into(*f, out), } +} - if let Some(f) = n.as_f64() { - return format_f64_canonical(f); - } +fn write_u64(out: &mut String, value: u64) { + let mut buf = ItoaBuffer::new(); + out.push_str(buf.format(value)); +} - n.to_string() +fn write_i64(out: &mut String, value: i64) { + let mut buf = ItoaBuffer::new(); + out.push_str(buf.format(value)); } -fn format_f64_canonical(f: f64) -> String { +fn write_f64_canonical_into(f: f64, out: &mut String) { // Normalize integer-valued floats to integers if f.is_finite() && f.fract() == 0.0 && f.abs() <= i64::MAX as f64 { - return format!("{}", f as i64); + write_i64(out, f as i64); + return; } - let default_format = format!("{f}"); + if !f.is_finite() { + out.push('0'); + return; + } + + let mut buf = RyuBuffer::new(); + let formatted = buf.format(f); // Handle cases where Rust would use exponential notation - if default_format.contains('e') || default_format.contains('E') { - format_without_exponent(f) + if formatted.contains('e') || formatted.contains('E') { + write_without_exponent(f, out); } else { - remove_trailing_zeros(&default_format) + push_trimmed_decimal(formatted, out); } } -fn format_without_exponent(f: f64) -> String { +fn write_without_exponent(f: f64, out: &mut String) { if !f.is_finite() { - return "0".to_string(); + out.push('0'); + return; } if f.abs() >= 1.0 { @@ -44,42 +63,60 @@ fn format_without_exponent(f: f64) -> String { let frac_part = abs_f.fract(); if frac_part == 0.0 { - format!("{}{}", if f < 0.0 { "-" } else { "" }, int_part as i64) + if abs_f <= i64::MAX as f64 { + if f < 0.0 { + out.push('-'); + } + write_i64(out, int_part as i64); + } else { + let result = format!("{f:.0}"); + push_trimmed_decimal(&result, out); + } } else { // High precision to avoid exponent, then trim trailing zeros let result = format!("{f:.17}"); - remove_trailing_zeros(&result) + push_trimmed_decimal(&result, out); } } else if f == 0.0 { - "0".to_string() + out.push('0'); } else { // Small numbers: use high precision to avoid exponent - let result = format!("{f:.17}",); - remove_trailing_zeros(&result) + let result = format!("{f:.17}"); + push_trimmed_decimal(&result, out); } } +#[cfg(test)] fn remove_trailing_zeros(s: &str) -> String { - if !s.contains('.') { + if let Some((int_part, frac_part)) = s.split_once('.') { + let trimmed = frac_part.trim_end_matches('0'); + if trimmed.is_empty() { + int_part.to_string() + } else { + let mut out = String::with_capacity(int_part.len() + 1 + trimmed.len()); + out.push_str(int_part); + out.push('.'); + out.push_str(trimmed); + out + } + } else { // No decimal point, return as-is - return s.to_string(); - } - - let parts: Vec<&str> = s.split('.').collect(); - if parts.len() != 2 { - return s.to_string(); + s.to_string() } +} - let int_part = parts[0]; - let mut frac_part = parts[1].to_string(); - - frac_part = frac_part.trim_end_matches('0').to_string(); - - if frac_part.is_empty() { - // All zeros removed, return as integer - int_part.to_string() +fn push_trimmed_decimal(s: &str, out: &mut String) { + if let Some((int_part, frac_part)) = s.split_once('.') { + let trimmed = frac_part.trim_end_matches('0'); + if trimmed.is_empty() { + out.push_str(int_part); + } else { + out.push_str(int_part); + out.push('.'); + out.push_str(trimmed); + } } else { - format!("{int_part}.{frac_part}") + out.push_str(s); } } diff --git a/src/utils/string.rs b/src/utils/string.rs index 7423d5a..3d1e630 100644 --- a/src/utils/string.rs +++ b/src/utils/string.rs @@ -1,24 +1,26 @@ -use crate::{ - types::Delimiter, - utils::literal, -}; +use crate::{types::Delimiter, utils::literal}; /// Escape special characters in a string for quoted output. pub fn escape_string(s: &str) -> String { let mut result = String::with_capacity(s.len()); + escape_string_into(&mut result, s); + + result +} + +/// Escape special characters in a string and append to the output buffer. +pub fn escape_string_into(out: &mut String, s: &str) { for ch in s.chars() { match ch { - '\n' => result.push_str("\\n"), - '\r' => result.push_str("\\r"), - '\t' => result.push_str("\\t"), - '"' => result.push_str("\\\""), - '\\' => result.push_str("\\\\"), - _ => result.push(ch), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + '"' => out.push_str("\\\""), + '\\' => out.push_str("\\\\"), + _ => out.push(ch), } } - - result } /// Unescape special characters in a quoted string. @@ -92,25 +94,26 @@ pub fn unescape_string(s: &str) -> Result { Ok(result) } -/// Check if a key can be written without quotes (alphanumeric, underscore, -/// dot). -pub fn is_valid_unquoted_key(key: &str) -> bool { - if key.is_empty() { +fn is_valid_unquoted_key_internal(key: &str, allow_hyphen: bool) -> bool { + let bytes = key.as_bytes(); + if bytes.is_empty() { return false; } - let mut chars = key.chars(); - let first = if let Some(c) = chars.next() { - c - } else { - return false; - }; - - if !first.is_alphabetic() && first != '_' { + let first = bytes[0]; + if !first.is_ascii_alphabetic() && first != b'_' { return false; } - chars.all(|c| c.is_alphanumeric() || c == '_' || c == '.') + bytes[1..].iter().all(|b| { + b.is_ascii_alphanumeric() || *b == b'_' || *b == b'.' || (allow_hyphen && *b == b'-') + }) +} + +/// Check if a key can be written without quotes (alphanumeric, underscore, +/// dot). +pub fn is_valid_unquoted_key(key: &str) -> bool { + is_valid_unquoted_key_internal(key, false) } /// Determine if a string needs quoting based on content and delimiter. @@ -123,33 +126,47 @@ pub fn needs_quoting(s: &str, delimiter: char) -> bool { return true; } - if s.chars().any(literal::is_structural_char) { - return true; - } - - if s.contains('\\') || s.contains('"') { - return true; - } + let mut chars = s.chars(); + let first = match chars.next() { + Some(ch) => ch, + None => return true, + }; - if s.contains(delimiter) { + if first.is_whitespace() || first == '-' { return true; } - if s.contains('\n') || s.contains('\r') || s.contains('\t') { + if first == '\\' + || first == '"' + || first == delimiter + || first == '\n' + || first == '\r' + || first == '\t' + || literal::is_structural_char(first) + { return true; } - if s.starts_with(char::is_whitespace) || s.ends_with(char::is_whitespace) { + if first == '0' && chars.clone().next().is_some_and(|c| c.is_ascii_digit()) { return true; } - if s.starts_with('-') { - return true; + let mut last = first; + for ch in chars { + if literal::is_structural_char(ch) + || ch == '\\' + || ch == '"' + || ch == delimiter + || ch == '\n' + || ch == '\r' + || ch == '\t' + { + return true; + } + last = ch; } - // Check for leading zeros (e.g., "05", "007", "0123") - // Numbers with leading zeros must be quoted - if s.starts_with('0') && s.len() > 1 && s.chars().nth(1).is_some_and(|c| c.is_ascii_digit()) { + if last.is_whitespace() { return true; } @@ -158,7 +175,11 @@ pub fn needs_quoting(s: &str, delimiter: char) -> bool { /// Quote and escape a string. pub fn quote_string(s: &str) -> String { - format!("\"{}\"", escape_string(s)) + let mut result = String::with_capacity(s.len() + 2); + result.push('"'); + escape_string_into(&mut result, s); + result.push('"'); + result } pub fn split_by_delimiter(s: &str, delimiter: Delimiter) -> Vec { diff --git a/src/utils/validation.rs b/src/utils/validation.rs index d14167b..f85aee9 100644 --- a/src/utils/validation.rs +++ b/src/utils/validation.rs @@ -1,16 +1,13 @@ use serde_json::Value; -use crate::types::{ - ToonError, - ToonResult, -}; +use crate::types::{ToonError, ToonResult}; /// Validate that nesting depth doesn't exceed the maximum. pub fn validate_depth(depth: usize, max_depth: usize) -> ToonResult<()> { if depth > max_depth { - return Err(ToonError::InvalidStructure( - "Maximum nesting depth of {max_depth} exceeded".to_string(), - )); + return Err(ToonError::InvalidStructure(format!( + "Maximum nesting depth of {max_depth} exceeded" + ))); } Ok(()) } diff --git a/tests/arrays.rs b/tests/arrays.rs index 437fafa..b879f52 100644 --- a/tests/arrays.rs +++ b/tests/arrays.rs @@ -1,13 +1,7 @@ use std::f64; -use serde_json::{ - json, - Value, -}; -use toon_format::{ - decode_default, - encode_default, -}; +use serde_json::{json, Value}; +use toon_format::{decode_default, encode_default}; #[test] fn test_tabular_arrays() { diff --git a/tests/boundary.rs b/tests/boundary.rs new file mode 100644 index 0000000..9803de7 --- /dev/null +++ b/tests/boundary.rs @@ -0,0 +1,53 @@ +//! Boundary condition tests + +use serde_json::json; +use toon_format::constants::MAX_DEPTH; +use toon_format::{decode, encode, DecodeOptions, EncodeOptions}; + +#[test] +fn test_max_depth_boundary() { + let mut nested = json!(null); + for _ in 0..=MAX_DEPTH { + nested = json!({"a": nested}); + } + + let encoded = encode(&nested, &EncodeOptions::default()); + assert!(encoded.is_ok()); + + let too_deep = json!({"a": nested}); + let result = encode(&too_deep, &EncodeOptions::default()); + assert!(result.is_err()); +} + +#[test] +fn test_large_array() { + let data: Vec = (0..10_000).collect(); + let large = json!(data); + let encoded = encode(&large, &EncodeOptions::default()).unwrap(); + let decoded: serde_json::Value = decode(&encoded, &DecodeOptions::default()).unwrap(); + assert_eq!(large, decoded); +} + +#[test] +fn test_very_long_string() { + let long_string = "x".repeat(100_000); + let value = json!({"data": long_string}); + let encoded = encode(&value, &EncodeOptions::default()).unwrap(); + let decoded: serde_json::Value = decode(&encoded, &DecodeOptions::default()).unwrap(); + assert_eq!(value, decoded); +} + +#[test] +fn test_empty_structures() { + let empty_obj = json!({}); + let empty_arr = json!([]); + + let encoded_obj = encode(&empty_obj, &EncodeOptions::default()).unwrap(); + let encoded_arr = encode(&empty_arr, &EncodeOptions::default()).unwrap(); + + let decoded_obj: serde_json::Value = decode(&encoded_obj, &DecodeOptions::default()).unwrap(); + let decoded_arr: serde_json::Value = decode(&encoded_arr, &DecodeOptions::default()).unwrap(); + + assert_eq!(empty_obj, decoded_obj); + assert_eq!(empty_arr, decoded_arr); +} diff --git a/tests/decode_helpers.rs b/tests/decode_helpers.rs new file mode 100644 index 0000000..a2b2309 --- /dev/null +++ b/tests/decode_helpers.rs @@ -0,0 +1,16 @@ +use serde_json::{json, Value}; +use toon_format::{decode_no_coerce_with_options, decode_strict_with_options, DecodeOptions}; + +#[test] +fn test_decode_strict_with_options_forces_strict() { + let opts = DecodeOptions::new().with_strict(false); + let result: Result = decode_strict_with_options("items[2]: a", &opts); + assert!(result.is_err(), "Strict mode should reject length mismatch"); +} + +#[test] +fn test_decode_no_coerce_with_options_disables_coercion() { + let opts = DecodeOptions::new().with_coerce_types(true); + let result: Value = decode_no_coerce_with_options("value: 123", &opts).unwrap(); + assert_eq!(result, json!({"value": "123"})); +} diff --git a/tests/delimiters.rs b/tests/delimiters.rs index 7d1bb28..dd0ef38 100644 --- a/tests/delimiters.rs +++ b/tests/delimiters.rs @@ -1,14 +1,5 @@ -use serde_json::{ - json, - Value, -}; -use toon_format::{ - decode_default, - encode, - encode_default, - Delimiter, - EncodeOptions, -}; +use serde_json::{json, Value}; +use toon_format::{decode_default, encode, encode_default, Delimiter, EncodeOptions}; #[test] fn test_delimiter_variants() { @@ -54,7 +45,7 @@ fn test_non_active_delimiters_in_tabular_arrays() { // data Per TOON spec §11: "non-active delimiters MUST NOT cause splits" // Test 1: Pipe character in value when comma is active delimiter (default) - let data = r#"item-list[1]{a,b}: + let data = r#""item-list"[1]{a,b}: ":",| "#; let decoded: Value = decode_default(data).unwrap(); @@ -62,7 +53,7 @@ fn test_non_active_delimiters_in_tabular_arrays() { assert_eq!(decoded["item-list"][0]["b"], "|"); // Test 2: Both values quoted - let data = r#"item-list[1]{a,b}: + let data = r#""item-list"[1]{a,b}: ":","|" "#; let decoded: Value = decode_default(data).unwrap(); @@ -70,13 +61,13 @@ fn test_non_active_delimiters_in_tabular_arrays() { assert_eq!(decoded["item-list"][0]["b"], "|"); // Test 3: Tab character in value when comma is active - let data = "item-list[1]{a,b}:\n \":\",\t\n"; + let data = "\"item-list\"[1]{a,b}:\n \":\",\"\\t\"\n"; let decoded: Value = decode_default(data).unwrap(); assert_eq!(decoded["item-list"][0]["a"], ":"); assert_eq!(decoded["item-list"][0]["b"], "\t"); // Test 4: Comma in value when pipe is active delimiter - should quote the comma - let data = r#"item-list[1|]{a|b}: + let data = r#""item-list"[1|]{a|b}: ":"|"," "#; let decoded: Value = decode_default(data).unwrap(); @@ -106,7 +97,7 @@ fn test_non_active_delimiters_in_inline_arrays() { fn test_delimiter_mismatch_error() { // Per TOON spec §6: delimiter in brackets must match delimiter in braces // This should error: pipe in brackets, comma in braces - let data = r#"item-list[1|]{a,b}: + let data = r#""item-list"[1|]{a,b}: ":",| "#; let result: Result = decode_default(data); diff --git a/tests/encode_helpers.rs b/tests/encode_helpers.rs new file mode 100644 index 0000000..a7d44fb --- /dev/null +++ b/tests/encode_helpers.rs @@ -0,0 +1,49 @@ +use indexmap::IndexMap; +use serde_json::json; +use toon_format::types::{JsonValue, Number}; +use toon_format::{encode_array, encode_object, EncodeOptions, ToonError}; + +#[test] +fn test_encode_array_and_object_with_json() { + let array = json!(["a", "b"]); + let encoded = encode_array(&array, &EncodeOptions::default()).unwrap(); + assert!(encoded.starts_with("[2]:")); + + let object = json!({"a": 1}); + let encoded = encode_object(&object, &EncodeOptions::default()).unwrap(); + assert!(encoded.contains("a: 1")); +} + +#[test] +fn test_encode_array_object_type_mismatch() { + let err = encode_array(json!({"a": 1}), &EncodeOptions::default()).unwrap_err(); + match err { + ToonError::TypeMismatch { expected, found } => { + assert_eq!(expected, "array"); + assert_eq!(found, "object"); + } + _ => panic!("Expected TypeMismatch for encode_array"), + } + + let err = encode_object(json!(["a", "b"]), &EncodeOptions::default()).unwrap_err(); + match err { + ToonError::TypeMismatch { expected, found } => { + assert_eq!(expected, "object"); + assert_eq!(found, "array"); + } + _ => panic!("Expected TypeMismatch for encode_object"), + } +} + +#[test] +fn test_encode_array_object_with_json_value() { + let value = JsonValue::Array(vec![JsonValue::Number(Number::from(1))]); + let encoded = encode_array(value, &EncodeOptions::default()).unwrap(); + assert!(encoded.contains("1")); + + let mut obj = IndexMap::new(); + obj.insert("key".to_string(), JsonValue::Bool(true)); + let value = JsonValue::Object(obj); + let encoded = encode_object(value, &EncodeOptions::default()).unwrap(); + assert!(encoded.contains("key: true")); +} diff --git a/tests/error_context.rs b/tests/error_context.rs new file mode 100644 index 0000000..763e69b --- /dev/null +++ b/tests/error_context.rs @@ -0,0 +1,101 @@ +use std::sync::Arc; + +use toon_format::types::{ErrorContext, ToonError}; + +#[test] +fn test_error_context_inline_rendering() { + let ctx = ErrorContext::new("line 2") + .with_preceding_lines(vec!["line 1".to_string()]) + .with_following_lines(vec!["line 3".to_string()]) + .with_indicator(3) + .with_suggestion("use a colon"); + + let rendered = format!("{ctx}"); + assert!(rendered.contains("line 1")); + assert!(rendered.contains("> line 2")); + assert!(rendered.contains("^")); + assert!(rendered.contains("line 3")); + assert!(rendered.contains("Suggestion: use a colon")); +} + +#[test] +fn test_error_context_lazy_rendering_and_indicator() { + let input: Arc = Arc::from("one\ntwo\nthree"); + let ctx = ErrorContext::from_shared_input(Arc::clone(&input), 2, 2, 1).unwrap(); + + let rendered = format!("{ctx}"); + assert!(rendered.contains("> two")); + assert!(rendered.contains("^")); + assert!(rendered.contains("one")); + assert!(rendered.contains("three")); + + let inline = ctx.with_indicator(2); + let rendered_inline = format!("{inline}"); + assert!(rendered_inline.contains("^")); +} + +#[test] +fn test_error_context_invalid_line_returns_none() { + let input: Arc = Arc::from("line 1"); + assert!(ErrorContext::from_shared_input(Arc::clone(&input), 0, 1, 1).is_none()); + assert!(ErrorContext::from_shared_input(Arc::clone(&input), 2, 1, 1).is_none()); +} + +#[test] +fn test_toon_error_helpers() { + let err = ToonError::invalid_char('@', 4); + match err { + ToonError::InvalidCharacter { char, position } => { + assert_eq!(char, '@'); + assert_eq!(position, 4); + } + _ => panic!("Expected InvalidCharacter error"), + } + + let err = ToonError::type_mismatch("string", "number"); + match err { + ToonError::TypeMismatch { expected, found } => { + assert_eq!(expected, "string"); + assert_eq!(found, "number"); + } + _ => panic!("Expected TypeMismatch error"), + } + + let ctx = ErrorContext::new("bad input"); + let err = ToonError::parse_error(1, 2, "oops").with_context(ctx.clone()); + match err { + ToonError::ParseError { + context: Some(context), + .. + } => { + assert_eq!(context, Box::new(ctx.clone())); + } + _ => panic!("Expected ParseError with context"), + } + + let err = ToonError::length_mismatch(2, 1).with_context(ctx.clone()); + match err { + ToonError::LengthMismatch { + context: Some(context), + .. + } => { + assert_eq!(context, Box::new(ctx)); + } + _ => panic!("Expected LengthMismatch with context"), + } + + let err = ToonError::InvalidInput("nope".to_string()); + let untouched = err.clone().with_context(ErrorContext::new("unused")); + assert_eq!(err, untouched); + + let err = ToonError::parse_error(1, 1, "bad").with_suggestion("fix it"); + match err { + ToonError::ParseError { + context: Some(context), + .. + } => { + assert_eq!(context.suggestion.as_deref(), Some("fix it")); + } + _ => panic!("Expected ParseError with suggestion"), + } +} diff --git a/tests/errors.rs b/tests/errors.rs index 935b567..6df038e 100644 --- a/tests/errors.rs +++ b/tests/errors.rs @@ -1,14 +1,5 @@ -use serde_json::{ - json, - Value, -}; -use toon_format::{ - decode, - decode_default, - decode_strict, - DecodeOptions, - ToonError, -}; +use serde_json::{json, Value}; +use toon_format::{decode, decode_default, decode_strict, DecodeOptions, ToonError}; #[test] fn test_invalid_syntax_errors() { @@ -280,12 +271,12 @@ fn test_no_coercion_preserves_strings() { assert_eq!(result["value"], json!("true")); let result = decode::("value: 123", &opts).unwrap(); - assert!(result["value"].is_number()); - assert_eq!(result["value"], json!(123)); + assert!(result["value"].is_string()); + assert_eq!(result["value"], json!("123")); let result = decode::("value: true", &opts).unwrap(); - assert!(result["value"].is_boolean()); - assert_eq!(result["value"], json!(true)); + assert!(result["value"].is_string()); + assert_eq!(result["value"], json!("true")); } #[test] @@ -456,11 +447,7 @@ fn test_error_context_information() { ToonError::ParseError { context: Some(ctx), .. } => { - println!( - "Error context has {} preceding lines, {} following lines", - ctx.preceding_lines.len(), - ctx.following_lines.len() - ); + println!("Parse error context:\n{ctx}"); } ToonError::LengthMismatch { context: Some(ctx), .. diff --git a/tests/numeric.rs b/tests/numeric.rs index 18f4679..432c43c 100644 --- a/tests/numeric.rs +++ b/tests/numeric.rs @@ -1,13 +1,7 @@ use core::f64; -use serde_json::{ - json, - Value, -}; -use toon_format::{ - decode_default, - encode_default, -}; +use serde_json::{json, Value}; +use toon_format::{decode_default, encode_default}; #[test] fn test_numeric_edge_cases() { diff --git a/tests/objects.rs b/tests/objects.rs index f73446b..7709b74 100644 --- a/tests/objects.rs +++ b/tests/objects.rs @@ -1,11 +1,5 @@ -use serde_json::{ - json, - Value, -}; -use toon_format::{ - decode_default, - encode_default, -}; +use serde_json::{json, Value}; +use toon_format::{decode_default, encode_default}; #[test] fn test_special_characters_and_quoting() { diff --git a/tests/options.rs b/tests/options.rs new file mode 100644 index 0000000..a44d8da --- /dev/null +++ b/tests/options.rs @@ -0,0 +1,43 @@ +use toon_format::types::{DecodeOptions, EncodeOptions, Indent, KeyFoldingMode, PathExpansionMode}; +use toon_format::Delimiter; + +#[test] +fn test_indent_helpers() { + let indent = Indent::Spaces(2); + assert_eq!(indent.get_string(0), ""); + assert_eq!(indent.get_string(3).len(), 6); + assert_eq!(indent.get_spaces(), 2); + + let indent = Indent::Spaces(0); + assert_eq!(indent.get_string(2), ""); +} + +#[test] +fn test_encode_options_setters() { + let opts = EncodeOptions::new() + .with_delimiter(Delimiter::Pipe) + .with_key_folding(KeyFoldingMode::Safe) + .with_flatten_depth(2) + .with_spaces(4); + + assert_eq!(opts.delimiter, Delimiter::Pipe); + assert_eq!(opts.key_folding, KeyFoldingMode::Safe); + assert_eq!(opts.flatten_depth, 2); + assert_eq!(opts.indent, Indent::Spaces(4)); +} + +#[test] +fn test_decode_options_setters() { + let opts = DecodeOptions::new() + .with_strict(false) + .with_delimiter(Delimiter::Pipe) + .with_coerce_types(false) + .with_indent(Indent::Spaces(4)) + .with_expand_paths(PathExpansionMode::Safe); + + assert!(!opts.strict); + assert_eq!(opts.delimiter, Some(Delimiter::Pipe)); + assert!(!opts.coerce_types); + assert_eq!(opts.indent, Indent::Spaces(4)); + assert_eq!(opts.expand_paths, PathExpansionMode::Safe); +} diff --git a/tests/panic_safety.rs b/tests/panic_safety.rs new file mode 100644 index 0000000..8630670 --- /dev/null +++ b/tests/panic_safety.rs @@ -0,0 +1,27 @@ +//! Tests to verify panic safety of JsonValue operations + +use toon_format::types::JsonValue; + +#[test] +fn test_get_missing_key_returns_none() { + let obj = JsonValue::Object(Default::default()); + assert!(obj.get("nonexistent").is_none()); +} + +#[test] +fn test_get_on_non_object_returns_none() { + let arr = JsonValue::Array(vec![]); + assert!(arr.get("key").is_none()); +} + +#[test] +fn test_get_index_out_of_bounds_returns_none() { + let arr = JsonValue::Array(vec![]); + assert!(arr.get_index(0).is_none()); +} + +#[test] +fn test_get_index_on_non_array_returns_none() { + let obj = JsonValue::Object(Default::default()); + assert!(obj.get_index(0).is_none()); +} diff --git a/tests/parser_errors.rs b/tests/parser_errors.rs new file mode 100644 index 0000000..1353d8b --- /dev/null +++ b/tests/parser_errors.rs @@ -0,0 +1,81 @@ +use serde_json::Value; +use toon_format::{decode, decode_strict, DecodeOptions, Delimiter}; + +#[test] +fn test_strict_rejects_multiple_root_values() { + let err = decode_strict::("hello\nworld").unwrap_err(); + assert!(err + .to_string() + .contains("Multiple values at root level are not allowed")); +} + +#[test] +fn test_strict_invalid_unquoted_key() { + let err = decode_strict::("bad-key: 1").unwrap_err(); + assert!(err.to_string().contains("Invalid unquoted key")); +} + +#[test] +fn test_strict_missing_colon_in_object() { + let err = decode_strict::("obj:\n key").unwrap_err(); + assert!(err + .to_string() + .contains("Expected ':' after 'key' in object context")); +} + +#[test] +fn test_array_header_hash_marker_rejected() { + let err = decode_strict::("items[#2]: a,b").unwrap_err(); + assert!(err + .to_string() + .contains("Length marker '#' is not supported")); +} + +#[test] +fn test_array_header_missing_right_bracket() { + let err = decode_strict::("items[1|: a|b").unwrap_err(); + assert!(err.to_string().contains("Expected ']'")); +} + +#[test] +fn test_tabular_header_requires_newline() { + let err = decode_strict::("items[1]{a}: 1").unwrap_err(); + assert!(err + .to_string() + .contains("Expected newline after tabular array header")); +} + +#[test] +fn test_tabular_row_missing_delimiter() { + let err = decode_strict::("items[1]{a,b}:\n 1 2").unwrap_err(); + assert!(err.to_string().contains("Expected delimiter")); +} + +#[test] +fn test_tabular_blank_line_strict() { + let err = decode_strict::("items[2]{a}:\n 1\n\n 2").unwrap_err(); + assert!(err + .to_string() + .contains("Blank lines are not allowed inside tabular arrays")); +} + +#[test] +fn test_inline_array_missing_delimiter_strict() { + let err = decode_strict::("items[2]: a b").unwrap_err(); + assert!(err.to_string().contains("Expected delimiter")); +} + +#[test] +fn test_list_array_blank_line_strict() { + let err = decode_strict::("items[2]:\n - a\n\n - b").unwrap_err(); + assert!(err + .to_string() + .contains("Blank lines are not allowed inside list arrays")); +} + +#[test] +fn test_array_header_delimiter_mismatch() { + let opts = DecodeOptions::new().with_delimiter(Delimiter::Pipe); + let err = decode::("items[2,]: a,b", &opts).unwrap_err(); + assert!(err.to_string().contains("Detected delimiter")); +} diff --git a/tests/real_world.rs b/tests/real_world.rs index d47471f..708df22 100644 --- a/tests/real_world.rs +++ b/tests/real_world.rs @@ -1,11 +1,5 @@ -use serde_json::{ - json, - Value, -}; -use toon_format::{ - decode_default, - encode_default, -}; +use serde_json::{json, Value}; +use toon_format::{decode_default, encode_default}; #[test] fn test_real_world_github_data() { diff --git a/tests/round_trip.rs b/tests/round_trip.rs index 3125276..c40dea0 100644 --- a/tests/round_trip.rs +++ b/tests/round_trip.rs @@ -1,13 +1,7 @@ use std::f64; -use serde_json::{ - json, - Value, -}; -use toon_format::{ - decode_default, - encode_default, -}; +use serde_json::{json, Value}; +use toon_format::{decode_default, encode_default}; #[test] fn test_comprehensive_round_trips() { diff --git a/tests/scanner_errors.rs b/tests/scanner_errors.rs new file mode 100644 index 0000000..a4d727e --- /dev/null +++ b/tests/scanner_errors.rs @@ -0,0 +1,68 @@ +use toon_format::decode::scanner::{Scanner, Token}; +use toon_format::ToonError; + +#[test] +fn test_tabs_in_indentation_rejected() { + let mut scanner = Scanner::new("\tkey: value"); + let err = scanner.scan_token().unwrap_err(); + assert!(err + .to_string() + .contains("Tabs are not allowed in indentation")); +} + +#[test] +fn test_scan_quoted_string_invalid_escape() { + let mut scanner = Scanner::new(r#""bad\x""#); + let err = scanner.scan_token().unwrap_err(); + assert!(err.to_string().contains("Invalid escape sequence")); +} + +#[test] +fn test_scan_quoted_string_unterminated() { + let mut scanner = Scanner::new("\"unterminated"); + let err = scanner.scan_token().unwrap_err(); + assert!(matches!(err, ToonError::UnexpectedEof)); +} + +#[test] +fn test_parse_value_string_invalid_escape() { + let scanner = Scanner::new(""); + let err = scanner.parse_value_string(r#""bad\x""#).unwrap_err(); + assert!(err.to_string().contains("Invalid escape sequence")); +} + +#[test] +fn test_parse_value_string_unexpected_trailing_chars() { + let scanner = Scanner::new(""); + let err = scanner + .parse_value_string(r#""hello" trailing"#) + .unwrap_err(); + assert!(err + .to_string() + .contains("Unexpected characters after closing quote")); +} + +#[test] +fn test_parse_value_string_unterminated() { + let scanner = Scanner::new(""); + let err = scanner.parse_value_string(r#""missing"#).unwrap_err(); + assert!(err.to_string().contains("Unterminated string")); +} + +#[test] +fn test_scan_number_leading_zero_string() { + let mut scanner = Scanner::new("05"); + assert_eq!( + scanner.scan_token().unwrap(), + Token::String("05".to_string(), false) + ); +} + +#[test] +fn test_scan_number_trailing_char_string() { + let mut scanner = Scanner::new("1x"); + assert_eq!( + scanner.scan_token().unwrap(), + Token::String("1".to_string(), false) + ); +} diff --git a/tests/serde_api.rs b/tests/serde_api.rs new file mode 100644 index 0000000..a754cb5 --- /dev/null +++ b/tests/serde_api.rs @@ -0,0 +1,68 @@ +use std::io::Cursor; + +use serde::{Deserialize, Serialize}; +use serde_json::json; +use toon_format::{ + from_reader, from_slice, from_str, from_str_with_options, to_string, to_string_with_options, + to_vec, to_writer, DecodeOptions, Delimiter, EncodeOptions, ToonError, +}; + +#[derive(Debug, Serialize, Deserialize, PartialEq)] +struct User { + name: String, + age: u32, +} + +#[test] +fn test_round_trip_string_api() { + let user = User { + name: "Ada".to_string(), + age: 37, + }; + let encoded = to_string(&user).unwrap(); + let decoded: User = from_str(&encoded).unwrap(); + assert_eq!(decoded, user); +} + +#[test] +fn test_writer_reader_round_trip() { + let user = User { + name: "Turing".to_string(), + age: 41, + }; + let mut buffer = Vec::new(); + to_writer(&mut buffer, &user).unwrap(); + + let mut reader = Cursor::new(buffer); + let decoded: User = from_reader(&mut reader).unwrap(); + assert_eq!(decoded, user); +} + +#[test] +fn test_options_wiring() { + let data = json!({"items": ["a", "b"]}); + let opts = EncodeOptions::new().with_delimiter(Delimiter::Pipe); + let encoded = to_string_with_options(&data, &opts).unwrap(); + assert!(encoded.contains('|')); + + let decode_opts = DecodeOptions::new().with_strict(false); + let decoded: serde_json::Value = from_str_with_options("items[2]: a", &decode_opts).unwrap(); + assert_eq!(decoded, json!({"items": ["a"]})); +} + +#[test] +fn test_vec_and_slice_api() { + let user = User { + name: "Grace".to_string(), + age: 60, + }; + let bytes = to_vec(&user).unwrap(); + let decoded: User = from_slice(&bytes).unwrap(); + assert_eq!(decoded, user); +} + +#[test] +fn test_from_slice_invalid_utf8() { + let err = from_slice::(&[0xff]).unwrap_err(); + assert!(matches!(err, ToonError::InvalidInput(_))); +} diff --git a/tests/spec_edge_cases.rs b/tests/spec_edge_cases.rs new file mode 100644 index 0000000..d494562 --- /dev/null +++ b/tests/spec_edge_cases.rs @@ -0,0 +1,66 @@ +//! Spec compliance edge cases + +use serde_json::json; +use toon_format::types::PathExpansionMode; +use toon_format::{decode, decode_default, DecodeOptions}; + +#[test] +fn test_keyword_keys_allowed() { + let input = "true: 1\nfalse: 2\nnull: 3"; + let result: serde_json::Value = decode_default(input).unwrap(); + assert_eq!(result, json!({"true": 1, "false": 2, "null": 3})); +} + +#[test] +fn test_nested_array_delimiter_scoping() { + let input = "outer[2|]: [2]: a,b | [2]: c,d"; + let result: serde_json::Value = decode_default(input).unwrap(); + assert_eq!(result, json!({"outer": [["a", "b"], ["c", "d"]]})); +} + +#[test] +fn test_quoted_dotted_field_not_expanded() { + let input = "rows[1]{\"a.b\"}:\n 1"; + let opts = DecodeOptions::new().with_expand_paths(PathExpansionMode::Safe); + let result: serde_json::Value = decode(input, &opts).unwrap(); + assert_eq!(result, json!({"rows": [{"a.b": 1}]})); +} + +#[test] +fn test_negative_leading_zero_string() { + let input = "val: -05"; + let result: serde_json::Value = decode_default(input).unwrap(); + assert_eq!(result, json!({"val": "-05"})); +} + +#[test] +fn test_field_list_delimiter_mismatch_strict() { + let input = "items[1|]{a,b}:\n 1,2"; + let opts = DecodeOptions::new().with_strict(true); + assert!(decode::(input, &opts).is_err()); +} + +#[test] +fn test_unquoted_tab_rejected_in_strict() { + let input = "val: a\tb"; + let result: Result = decode_default(input); + assert!(result.is_err()); +} + +#[test] +fn test_multiple_spaces_preserved() { + let input = "msg: hello world"; + let result: serde_json::Value = decode_default(input).unwrap(); + assert_eq!(result, json!({"msg": "hello world"})); +} + +#[test] +fn test_coerce_types_toggle() { + let input = "value: 123\nflag: true\nnone: null"; + let opts = DecodeOptions::new().with_coerce_types(false); + let result: serde_json::Value = decode(input, &opts).unwrap(); + assert_eq!( + result, + json!({"value": "123", "flag": "true", "none": "null"}) + ); +} diff --git a/tests/spec_fixtures.rs b/tests/spec_fixtures.rs index 17c18f7..7d4a308 100644 --- a/tests/spec_fixtures.rs +++ b/tests/spec_fixtures.rs @@ -2,16 +2,8 @@ use datatest_stable::Utf8Path; use serde::Deserialize; use serde_json::Value; use toon_format::{ - decode, - encode, - types::{ - DecodeOptions, - Delimiter, - EncodeOptions, - Indent, - KeyFoldingMode, - PathExpansionMode, - }, + decode, encode, + types::{DecodeOptions, Delimiter, EncodeOptions, Indent, KeyFoldingMode, PathExpansionMode}, }; #[derive(Deserialize, Debug)] diff --git a/tests/strict_mode.rs b/tests/strict_mode.rs new file mode 100644 index 0000000..f16850c --- /dev/null +++ b/tests/strict_mode.rs @@ -0,0 +1,67 @@ +use serde_json::json; +use toon_format::{decode, DecodeOptions}; + +#[test] +fn test_negative_array_length_rejected() { + let input = "items[-1]:"; + let opts = DecodeOptions::new().with_strict(true); + let result = decode::(input, &opts); + + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("non-negative")); +} + +#[test] +fn test_float_array_length_rejected() { + let input = "items[3.5]:"; + let opts = DecodeOptions::new().with_strict(true); + let result = decode::(input, &opts); + + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("integer")); +} + +#[test] +fn test_mixed_delimiters_rejected_in_strict_mode() { + let input = "items[3]: a,b|c"; + let opts = DecodeOptions::new().with_strict(true); + let result = decode::(input, &opts); + + assert!(result.is_err()); +} + +#[test] +fn test_length_mismatch_allowed_in_non_strict_inline() { + let input = "items[1]: a,b"; + let opts = DecodeOptions::new().with_strict(false); + let result = decode::(input, &opts).unwrap(); + + assert_eq!(result["items"], json!(["a", "b"])); +} + +#[test] +fn test_length_mismatch_allowed_in_non_strict_list() { + let input = "items[1]:\n - 1\n - 2"; + let opts = DecodeOptions::new().with_strict(false); + let result = decode::(input, &opts).unwrap(); + + assert_eq!(result["items"], json!([1, 2])); +} + +#[test] +fn test_tab_indentation_allowed_in_non_strict_mode() { + let input = "items[1]:\n\t- 1"; + let opts = DecodeOptions::new().with_strict(false); + let result = decode::(input, &opts).unwrap(); + + assert_eq!(result["items"], json!([1])); +} + +#[test] +fn test_unquoted_key_rejected_in_strict_mode() { + let input = "bad-key: 1"; + let opts = DecodeOptions::new().with_strict(true); + let result = decode::(input, &opts); + + assert!(result.is_err()); +} diff --git a/tests/unicode.rs b/tests/unicode.rs index 9c7f35d..447e709 100644 --- a/tests/unicode.rs +++ b/tests/unicode.rs @@ -1,11 +1,5 @@ -use serde_json::{ - json, - Value, -}; -use toon_format::{ - decode_default, - encode_default, -}; +use serde_json::{json, Value}; +use toon_format::{decode_default, encode_default}; #[test] fn test_unicode_strings() { diff --git a/tests/value.rs b/tests/value.rs new file mode 100644 index 0000000..872fd0d --- /dev/null +++ b/tests/value.rs @@ -0,0 +1,161 @@ +use std::panic::{catch_unwind, AssertUnwindSafe}; + +use indexmap::IndexMap; +use serde_json::json; +use toon_format::types::{IntoJsonValue, JsonValue, Number}; + +#[test] +fn test_number_from_f64_rejects_non_finite() { + assert!(Number::from_f64(f64::NAN).is_none()); + assert!(Number::from_f64(f64::INFINITY).is_none()); + assert!(Number::from_f64(f64::NEG_INFINITY).is_none()); + assert!(Number::from_f64(1.5).is_some()); +} + +#[test] +fn test_number_integer_checks() { + let float_int = Number::Float(42.0); + assert!(float_int.is_i64()); + assert!(float_int.is_u64()); + + let float_frac = Number::Float(42.5); + assert!(!float_frac.is_i64()); + assert!(!float_frac.is_u64()); + + let float_max = Number::Float(i64::MAX as f64); + assert!(!float_max.is_i64()); + assert_eq!(float_max.as_i64(), Some(i64::MAX)); + + let float_neg = Number::Float(-1.0); + assert!(!float_neg.is_u64()); +} + +#[test] +fn test_number_as_conversions() { + let too_large = Number::PosInt(i64::MAX as u64 + 1); + assert_eq!(too_large.as_i64(), None); + + let neg = Number::NegInt(-5); + assert_eq!(neg.as_u64(), None); + + let float_exact = Number::Float(7.0); + assert_eq!(float_exact.as_i64(), Some(7)); + assert_eq!(float_exact.as_u64(), Some(7)); + + let float_frac = Number::Float(7.25); + assert_eq!(float_frac.as_i64(), None); + assert_eq!(float_frac.as_u64(), None); + + let float_nan = Number::Float(f64::NAN); + assert!(!float_nan.is_integer()); +} + +#[test] +fn test_number_display_nan() { + let value = Number::from(f64::NAN); + assert_eq!(format!("{value}"), "0"); +} + +#[test] +fn test_json_value_accessors_and_take() { + let mut obj = IndexMap::new(); + obj.insert("a".to_string(), JsonValue::Number(Number::from(1))); + + let mut value = JsonValue::Object(obj); + assert!(value.is_object()); + assert_eq!(value.type_name(), "object"); + assert_eq!(value.get("a").and_then(JsonValue::as_i64), Some(1)); + + value + .as_object_mut() + .unwrap() + .insert("b".to_string(), JsonValue::String("hi".to_string())); + assert_eq!(value.get("b").and_then(JsonValue::as_str), Some("hi")); + + let mut arr = JsonValue::Array(vec![JsonValue::Bool(true)]); + assert!(arr.is_array()); + arr.as_array_mut().unwrap().push(JsonValue::Null); + assert_eq!(arr.as_array().unwrap().len(), 2); + + let mut taken = JsonValue::String("take".to_string()); + let prior = taken.take(); + assert!(matches!(taken, JsonValue::Null)); + assert_eq!(prior.as_str(), Some("take")); +} + +#[test] +fn test_json_value_indexing_success() { + let mut arr = JsonValue::Array(vec![JsonValue::Number(Number::from(1)), JsonValue::Null]); + assert_eq!(arr[0].as_i64(), Some(1)); + arr[1] = JsonValue::Bool(true); + assert_eq!(arr[1].as_bool(), Some(true)); + + let mut obj = IndexMap::new(); + obj.insert("key".to_string(), JsonValue::Bool(false)); + let mut value = JsonValue::Object(obj); + + assert_eq!(value["key"].as_bool(), Some(false)); + value["key"] = JsonValue::Bool(true); + assert_eq!(value["key"].as_bool(), Some(true)); + + let owned_key = "key".to_string(); + assert_eq!(value[owned_key].as_bool(), Some(true)); +} + +#[test] +fn test_json_value_indexing_panics() { + let value = JsonValue::Null; + let err = catch_unwind(AssertUnwindSafe(|| { + let _ = &value["missing"]; + })); + assert!(err.is_err()); + + let empty_array = JsonValue::Array(Vec::new()); + let err = catch_unwind(AssertUnwindSafe(|| { + let _ = &empty_array[1]; + })); + assert!(err.is_err()); + + let mut not_array = JsonValue::Null; + let err = catch_unwind(AssertUnwindSafe(|| { + not_array[0] = JsonValue::Null; + })); + assert!(err.is_err()); + + let empty_object = JsonValue::Object(IndexMap::new()); + let err = catch_unwind(AssertUnwindSafe(|| { + let _ = &empty_object["absent"]; + })); + assert!(err.is_err()); +} + +#[test] +fn test_json_value_conversions() { + let json_value = json!({"a": [1, 2], "b": {"c": true}}); + let value = JsonValue::from(json_value.clone()); + let roundtrip: serde_json::Value = value.clone().into(); + assert_eq!(roundtrip, json_value); + + let nan_value = JsonValue::Number(Number::Float(f64::NAN)); + let json_nan: serde_json::Value = nan_value.into(); + assert_eq!(json_nan, json!(null)); +} + +#[test] +fn test_into_json_value_trait() { + let json_value = json!({"a": 1}); + let owned = json_value.into_json_value(); + assert_eq!(owned.get("a").and_then(JsonValue::as_i64), Some(1)); + + let json_value = json!({"b": true}); + let borrowed = (&json_value).into_json_value(); + assert_eq!(borrowed.get("b").and_then(JsonValue::as_bool), Some(true)); + + let value = JsonValue::Bool(false); + let cloned = value.into_json_value(); + assert!(matches!(cloned, JsonValue::Bool(false))); + + let value = JsonValue::Bool(true); + let borrowed = (&value).into_json_value(); + assert!(matches!(borrowed, JsonValue::Bool(true))); +}