Skip to content

Commit

Permalink
Implement string splitting via division.
Browse files Browse the repository at this point in the history
  • Loading branch information
01mf02 committed Mar 13, 2024
1 parent 7c20ee9 commit 6cb7ed2
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 26 deletions.
16 changes: 0 additions & 16 deletions jaq-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,18 +167,6 @@ fn as_codepoint(v: &Val) -> Result<char, Error> {
char::from_u32(u).ok_or_else(|| Error::str(format_args!("cannot use {u} as character")))
}

/// Split a string by a given separator string.
fn split(s: &str, sep: &str) -> Vec<Val> {
if sep.is_empty() {
// Rust's `split` function with an empty separator ("")
// yields an empty string as first and last result
// to prevent this, we are using `chars` instead
s.chars().map(|s| Val::str(s.to_string())).collect()
} else {
s.split(sep).map(|s| Val::str(s.to_string())).collect()
}
}

/// This implements a ~10x faster version of:
/// ~~~ text
/// def range($from; $to; $by): $from |
Expand Down Expand Up @@ -340,10 +328,6 @@ const CORE_RUN: &[(&str, usize, RunPtr)] = &[
let f = move |v| indices(&cv.1, &v?).map(|idxs| Val::arr(idxs.map(to_int).collect()));
Box::new(vals.map(f))
}),
("split", 1, |args, cv| {
let seps = args.get(0).run(cv.clone());
Box::new(seps.map(move |sep| Ok(Val::arr(split(cv.1.as_str()?, sep?.as_str()?)))))
}),
("first", 1, |args, cv| Box::new(args.get(0).run(cv).take(1))),
("limit", 2, |args, cv| {
let n = args.get(0).run(cv.clone()).map(|n| n?.as_int());
Expand Down
10 changes: 0 additions & 10 deletions jaq-core/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,16 +307,6 @@ fn round() {
fail(json!({}), "round", err(json!({})));
}

#[test]
fn split() {
give(json!("aöß"), r#"split("")"#, json!(["a", "ö", "ß"]));
give(
json!("abcabcdab"),
r#"split("ab")"#,
json!(["", "c", "cd", ""]),
);
}

#[test]
fn startswith() {
give(json!("foobar"), r#"startswith("")"#, json!(true));
Expand Down
15 changes: 15 additions & 0 deletions jaq-interpret/src/val.rs
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,20 @@ impl core::ops::Mul for Val {
}
}

/// Split a string by a given separator string.
fn split<'a>(s: &'a str, sep: &'a str) -> Box<dyn Iterator<Item = String> + 'a> {
if s.is_empty() {
Box::new(core::iter::empty())
} else if sep.is_empty() {
// Rust's `split` function with an empty separator ("")
// yields an empty string as first and last result
// to prevent this, we are using `chars` instead
Box::new(s.chars().map(|s| s.to_string()))
} else {
Box::new(s.split(sep).map(|s| s.to_string()))
}
}

impl core::ops::Div for Val {
type Output = ValR;
fn div(self, rhs: Self) -> Self::Output {
Expand All @@ -426,6 +440,7 @@ impl core::ops::Div for Val {
(Float(x), Float(y)) => Ok(Float(x / y)),
(Num(n), r) => Self::from_dec_str(&n) / r,
(l, Num(n)) => l / Self::from_dec_str(&n),
(Str(x), Str(y)) => Ok(Val::arr(split(&x, &y).map(Val::str).collect())),
(l, r) => Err(Error::MathOp(l, MathOp::Div, r)),
}
}
Expand Down
5 changes: 5 additions & 0 deletions jaq-interpret/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ fn mul() {
);
}

yields!(div_str, r#""abcabcdab" / "ab""#, ["", "c", "cd", ""]);
yields!(div_str_empty, r#""" / """#, json!([]));
yields!(div_str_empty_str, r#""" / "ab""#, json!([]));
yields!(div_str_empty_sep, r#""aöß" / """#, ["a", "ö", "ß"]);

#[test]
fn logic() {
let tf = json!([true, false]);
Expand Down
3 changes: 3 additions & 0 deletions jaq-std/src/std.jq
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ def scan(re; flags): matches(re; flags)[] | .[0].string;
def match(re; flags): matches(re; flags)[] | .[0] + { captures: .[1:] };
def capture(re; flags): matches(re; flags)[] | capture_of_match;

def split($sep):
if isstring and ($sep | isstring) then . / $sep
else error("split input and separator must be strings") end;
def split (re; flags): split_(re; flags + "g");
def splits(re; flags): split(re; flags)[];

Expand Down

0 comments on commit 6cb7ed2

Please sign in to comment.