Skip to content

Commit

Permalink
feat: add more text api (#398)
Browse files Browse the repository at this point in the history
* feat: add char_at

* feat: add slice

* feat: add splice

* feat: add rust/typescript API

* fix: ci test

* fix: patch

* fix: patch

* Update crates/loro-wasm/src/lib.rs

Co-authored-by: Zixuan Chen <[email protected]>

* Update crates/loro-wasm/src/lib.rs

Co-authored-by: Zixuan Chen <[email protected]>

* Update crates/loro/src/lib.rs

Co-authored-by: Zixuan Chen <[email protected]>

* perf: use entity index

* fix: patch

* fix: error brackets

---------

Co-authored-by: Zixuan Chen <[email protected]>
  • Loading branch information
Lampese and zxch3n authored Jul 14, 2024
1 parent 2f95480 commit c710ec3
Show file tree
Hide file tree
Showing 8 changed files with 367 additions and 3 deletions.
6 changes: 4 additions & 2 deletions crates/loro-common/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,12 @@ pub enum LoroError {
UndoWithDifferentPeerId { expected: PeerID, actual: PeerID },
#[error("The input JSON schema is invalid")]
InvalidJsonSchema,
#[error("Cannot insert or delete utf-8 in the middle of the codepoint in Unicode.")]
#[error("Cannot insert or delete utf-8 in the middle of the codepoint in Unicode")]
UTF8InUnicodeCodePoint { pos: usize },
#[error("Cannot insert or delete utf-16 in the middle of the codepoint in Unicode.")]
#[error("Cannot insert or delete utf-16 in the middle of the codepoint in Unicode")]
UTF16InUnicodeCodePoint { pos: usize },
#[error("The end index cannot be less than the start index")]
EndIndexLessThanStartIndex { start: usize, end: usize },
}

#[derive(Error, Debug)]
Expand Down
80 changes: 80 additions & 0 deletions crates/loro-internal/src/container/richtext/richtext_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,17 @@ impl CanRemove for RichtextStateChunk {
}
}

//TODO: start/end can be scanned in one loop, but now it takes twice the time
fn unicode_slice(s: &str, start_index: usize, end_index: usize) -> Result<&str, ()> {
let (Some(start), Some(end)) = (
unicode_to_utf8_index(s, start_index),
unicode_to_utf8_index(s, end_index),
) else {
return Err(());
};
Ok(&s[start..end])
}

pub(crate) fn unicode_to_utf8_index(s: &str, unicode_index: usize) -> Option<usize> {
let mut current_unicode_index = 0;
for (byte_index, _) in s.char_indices() {
Expand Down Expand Up @@ -1620,6 +1631,27 @@ impl RichtextState {
self.style_ranges.as_mut().unwrap()
}

pub(crate) fn get_char_by_event_index(&self, pos: usize) -> Result<char, ()> {
let cursor = self.tree.query::<EventIndexQuery>(&pos).unwrap().cursor;
let Some(str) = &self.tree.get_elem(cursor.leaf) else {
return Err(());
};
if cfg!(not(feature = "wasm")) {
let mut char_iter = str.as_str().unwrap().chars();
match &mut char_iter.nth(cursor.offset) {
Some(c) => Ok(*c),
None => Err(()),
}
} else {
let s = str.as_str().unwrap();
let utf16offset = unicode_to_utf16_index(s, cursor.offset).unwrap();
match s.encode_utf16().nth(utf16offset) {
Some(c) => Ok(std::char::from_u32(c as u32).unwrap()),
None => Err(()),
}
}
}

/// Find the best insert position based on algorithm similar to Peritext.
/// The result is only different from `query` when there are style anchors around the insert pos.
/// Returns the right neighbor of the insert pos and the entity index.
Expand Down Expand Up @@ -1868,6 +1900,54 @@ impl RichtextState {
Ok(ans)
}

pub(crate) fn get_text_slice_by_event_index(
&self,
pos: usize,
len: usize,
) -> LoroResult<String> {
if self.tree.is_empty() {
return Ok(String::new());
}

if len == 0 {
return Ok(String::new());
}

if pos + len > self.len_event() {
return Err(LoroError::OutOfBound {
pos: pos + len,
len: self.len_event(),
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
});
}

let mut ans = String::new();
let (start, end) = (
self.tree.query::<EventIndexQuery>(&pos).unwrap().cursor,
self.tree
.query::<EventIndexQuery>(&(pos + len))
.unwrap()
.cursor,
);

for span in self.tree.iter_range(start..end) {
let start = span.start.unwrap_or(0);
let end = span.end.unwrap_or(span.elem.rle_len());
if end == 0 {
break;
}

if let RichtextStateChunk::Text(s) = span.elem {
match unicode_slice(&s.as_str(), start, end) {
Ok(x) => ans.push_str(&x),
Err(()) => return Err(LoroError::UTF16InUnicodeCodePoint { pos: pos + len }),
}
}
}

Ok(ans)
}

// PERF: can be splitted into two methods. One is without cursor_to_event_index
// PERF: can be speed up a lot by detecting whether the range is in a single leaf first
/// This is used to accept changes from DiffCalculator
Expand Down
76 changes: 75 additions & 1 deletion crates/loro-internal/src/handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use std::{
sync::{Arc, Mutex, Weak},
};

use tracing::{debug, error, info, instrument, Event};
use tracing::{debug, error, info, instrument};

mod tree;
pub use tree::TreeHandler;
Expand Down Expand Up @@ -1363,6 +1363,80 @@ impl TextHandler {
}
}

/// `pos` is a Event Index:
///
/// - if feature="wasm", pos is a UTF-16 index
/// - if feature!="wasm", pos is a Unicode index
pub fn char_at(&self, pos: usize) -> LoroResult<char> {
if pos >= self.len_event() {
return Err(LoroError::OutOfBound {
pos: pos,
len: self.len_event(),
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
});
}
if let Ok(c) = match &self.inner {
MaybeDetached::Detached(t) => {
let t = t.try_lock().unwrap();
t.value.get_char_by_event_index(pos)
}
MaybeDetached::Attached(a) => a.with_state(|state| {
state
.as_richtext_state_mut()
.unwrap()
.get_char_by_event_index(pos)
}),
} {
Ok(c)
} else {
Err(LoroError::OutOfBound {
pos: pos,
len: self.len_event(),
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
})
}
}

/// `start_index` and `end_index` are Event Index:
///
/// - if feature="wasm", pos is a UTF-16 index
/// - if feature!="wasm", pos is a Unicode index
///
pub fn slice(&self, start_index: usize, end_index: usize) -> LoroResult<String> {
if end_index < start_index {
return Err(LoroError::EndIndexLessThanStartIndex {
start: start_index,
end: end_index,
});
}
match &self.inner {
MaybeDetached::Detached(t) => {
let t = t.try_lock().unwrap();
t.value
.get_text_slice_by_event_index(start_index, end_index - start_index)
}
MaybeDetached::Attached(a) => a.with_state(|state| {
state
.as_richtext_state_mut()
.unwrap()
.get_text_slice_by_event_index(start_index, end_index - start_index)
}),
}
}

/// `pos` is a Event Index:
///
/// - if feature="wasm", pos is a UTF-16 index
/// - if feature!="wasm", pos is a Unicode index
///
/// This method requires auto_commit to be enabled.
pub fn splice(&self, pos: usize, len: usize, s: &str) -> LoroResult<String> {
let x = self.slice(pos, pos + len)?;
self.delete(pos, len)?;
self.insert(pos, s)?;
Ok(x)
}

/// `pos` is a Event Index:
///
/// - if feature="wasm", pos is a UTF-16 index
Expand Down
12 changes: 12 additions & 0 deletions crates/loro-internal/src/state/richtext_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,18 @@ impl RichtextState {
}
}

pub(crate) fn get_text_slice_by_event_index(
&mut self,
pos: usize,
len: usize,
) -> LoroResult<String> {
self.state.get_mut().get_text_slice_by_event_index(pos, len)
}

pub(crate) fn get_char_by_event_index(&mut self, pos: usize) -> Result<char, ()> {
self.state.get_mut().get_char_by_event_index(pos)
}

pub(crate) fn iter(&mut self, mut callback: impl FnMut(&str) -> bool) -> () {
for span in self.state.get_mut().iter() {
if !callback(span.text.as_str()) {
Expand Down
97 changes: 97 additions & 0 deletions crates/loro-internal/tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,103 @@ fn test_delete_utf8_panic_out_bound_len() {
}

#[test]
fn test_char_at() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert(0, "Herld").unwrap();
text.insert(2, "llo Wo").unwrap();
assert_eq!(text.char_at(0).unwrap(), 'H');
assert_eq!(text.char_at(1).unwrap(), 'e');
assert_eq!(text.char_at(2).unwrap(), 'l');
assert_eq!(text.char_at(3).unwrap(), 'l');
let err = text.char_at(15).unwrap_err();
assert!(matches!(err, loro_common::LoroError::OutOfBound { .. }))
}

#[test]
fn test_char_at_detached() {
let text = TextHandler::new_detached();
text.insert(0, "Herld").unwrap();
text.insert(2, "llo Wo").unwrap();
assert_eq!(text.char_at(0).unwrap(), 'H');
assert_eq!(text.char_at(1).unwrap(), 'e');
assert_eq!(text.char_at(2).unwrap(), 'l');
assert_eq!(text.char_at(3).unwrap(), 'l');
let err = text.char_at(15).unwrap_err();
assert!(matches!(err, loro_common::LoroError::OutOfBound { .. }))
}

#[test]
fn test_char_at_wchar() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert(0, "你好").unwrap();
text.insert(1, "世界").unwrap();
assert_eq!(text.char_at(0).unwrap(), '你');
assert_eq!(text.char_at(1).unwrap(), '世');
assert_eq!(text.char_at(2).unwrap(), '界');
assert_eq!(text.char_at(3).unwrap(), '好');
let err = text.char_at(5).unwrap_err();
assert!(matches!(err, loro_common::LoroError::OutOfBound { .. }))
}

#[test]
fn test_text_slice() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert(0, "Hello").unwrap();
text.insert(1, "World").unwrap();
assert_eq!(text.slice(0, 4).unwrap(), "HWor");
assert_eq!(text.slice(0, 1).unwrap(), "H");
}

#[test]
fn test_text_slice_detached() {
let text = TextHandler::new_detached();
text.insert(0, "Herld").unwrap();
text.insert(2, "llo Wo").unwrap();
assert_eq!(text.slice(0, 4).unwrap(), "Hell");
assert_eq!(text.slice(0, 1).unwrap(), "H");
}

#[test]
fn test_text_slice_wchar() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert(0, "你好").unwrap();
text.insert(1, "世界").unwrap();
assert_eq!(text.slice(0, 3).unwrap(), "你世界");
}

#[test]
#[should_panic]
fn test_text_slice_end_index_less_than_start() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert(0, "你好").unwrap();
text.insert(1, "世界").unwrap();
text.slice(2, 1).unwrap();
}

#[test]
#[should_panic]
fn test_text_slice_out_of_bound() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert(0, "你好").unwrap();
text.insert(1, "世界").unwrap();
text.slice(1, 10).unwrap();
}

#[test]
fn test_text_splice() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert(0, "你好").unwrap();
assert_eq!(text.splice(1, 1, "世界").unwrap(), "好");
assert_eq!(text.to_string(), "你世界");
}

fn test_text_iter() {
let mut str = String::new();
let doc = LoroDoc::new_auto_commit();
Expand Down
55 changes: 55 additions & 0 deletions crates/loro-wasm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1537,6 +1537,61 @@ impl LoroText {
Ok(())
}

/// Get a string slice.
///
/// @example
/// ```ts
/// import { Loro } from "loro-crdt";
///
/// const doc = new Loro();
/// const text = doc.getText("text");
/// text.insert(0, "Hello");
/// text.slice(0, 2); // "He"
/// ```
pub fn slice(&mut self, start_index: usize, end_index: usize) -> JsResult<String> {
match self.handler.slice(start_index, end_index) {
Ok(x) => Ok(x),
Err(x) => Err(x.into()),
}
}

/// Get the character at the given position.
///
/// @example
/// ```ts
/// import { Loro } from "loro-crdt";
///
/// const doc = new Loro();
/// const text = doc.getText("text");
/// text.insert(0, "Hello");
/// text.charAt(0); // "H"
/// ```
#[wasm_bindgen(js_name = "charAt")]
pub fn char_at(&mut self, pos: usize) -> JsResult<char> {
match self.handler.char_at(pos) {
Ok(x) => Ok(x),
Err(x) => Err(x.into()),
}
}

/// Delete and return the string at the given range and insert a string at the same position.
///
/// @example
/// ```ts
/// import { Loro } from "loro-crdt";
///
/// const doc = new Loro();
/// const text = doc.getText("text");
/// text.insert(0, "Hello");
/// text.splice(2, 3, "llo"); // "llo"
/// ```
pub fn splice(&mut self, pos: usize, len: usize, s: &str) -> JsResult<String> {
match self.handler.splice(pos, len, s) {
Ok(x) => Ok(x),
Err(x) => Err(x.into()),
}
}

/// Insert some string at utf-8 index.
///
/// @example
Expand Down
Loading

0 comments on commit c710ec3

Please sign in to comment.