From 0195119a58b316ce8c3bb55363cfb46c52601a83 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Tue, 8 Jul 2025 17:57:30 +0800 Subject: [PATCH] refactor: use view primitives --- builtin/arrayview.mbt | 52 ++++++++------ builtin/uninitialized_array.mbt | 6 +- bytes/view.mbt | 1 + string/deprecated.mbt | 16 +---- string/view.mbt | 120 +++++++++++++++++++------------- 5 files changed, 110 insertions(+), 85 deletions(-) diff --git a/builtin/arrayview.mbt b/builtin/arrayview.mbt index bd756e141..d86f9d5db 100644 --- a/builtin/arrayview.mbt +++ b/builtin/arrayview.mbt @@ -25,11 +25,23 @@ /// ``` #deprecated("use @array.View instead") #builtin.valtype -struct ArrayView[T] { - buf : UninitializedArray[T] - start : Int - len : Int -} +type ArrayView[T] + +///| +fn[T] ArrayView::buf(self : ArrayView[T]) -> UninitializedArray[T] = "%arrayview.buf" + +///| +fn[T] ArrayView::start(self : ArrayView[T]) -> Int = "%arrayview.start" + +///| +fn[T] ArrayView::len(self : ArrayView[T]) -> Int = "%arrayview.len" + +///| +fn[T] ArrayView::make( + buf : UninitializedArray[T], + start : Int, + len : Int, +) -> ArrayView[T] = "%arrayview.make" ///| /// Returns the length (number of elements) of an array view. @@ -48,7 +60,7 @@ struct ArrayView[T] { /// inspect(view.length(), content="2") /// ``` pub fn[T] ArrayView::length(self : ArrayView[T]) -> Int { - self.len + self.len() } ///| @@ -74,12 +86,12 @@ pub fn[T] ArrayView::length(self : ArrayView[T]) -> Int { /// inspect(view[1], content="4") /// ``` pub fn[T] ArrayView::op_get(self : ArrayView[T], index : Int) -> T { - guard index >= 0 && index < self.len else { + guard index >= 0 && index < self.len() else { abort( - "index out of bounds: the len is from 0 to \{self.len} but the index is \{index}", + "index out of bounds: the len is from 0 to \{self.len()} but the index is \{index}", ) } - self.buf[self.start + index] + self.buf()[self.start() + index] } ///| @@ -104,7 +116,7 @@ pub fn[T] ArrayView::op_get(self : ArrayView[T], index : Int) -> T { #intrinsic("%arrayview.unsafe_get") #internal(unsafe, "Panic if index is out of bounds") pub fn[T] ArrayView::unsafe_get(self : ArrayView[T], index : Int) -> T { - self.buf[self.start + index] + self.buf()[self.start() + index] } ///| @@ -133,12 +145,12 @@ pub fn[T] ArrayView::op_set( index : Int, value : T, ) -> Unit { - guard index >= 0 && index < self.len else { + guard index >= 0 && index < self.len() else { abort( - "index out of bounds: the len is from 0 to \{self.len} but the index is \{index}", + "index out of bounds: the len is from 0 to \{self.len()} but the index is \{index}", ) } - self.buf[self.start + index] = value + self.buf()[self.start() + index] = value } ///| @@ -163,14 +175,14 @@ pub fn[T] ArrayView::op_set( /// ``` /// pub fn[T] ArrayView::swap(self : ArrayView[T], i : Int, j : Int) -> Unit { - guard i >= 0 && i < self.len && j >= 0 && j < self.len else { + guard i >= 0 && i < self.len() && j >= 0 && j < self.len() else { abort( - "index out of bounds: the len is from 0 to \{self.len} but the index is (\{i}, \{j})", + "index out of bounds: the len is from 0 to \{self.len()} but the index is (\{i}, \{j})", ) } - let temp = self.buf[self.start + i] - self.buf[self.start + i] = self.buf[self.start + j] - self.buf[self.start + j] = temp + let temp = self.buf()[self.start() + i] + self.buf()[self.start() + i] = self.buf()[self.start() + j] + self.buf()[self.start() + j] = temp } ///| @@ -212,7 +224,7 @@ pub fn[T] Array::op_as_view( guard start >= 0 && start <= end && end <= len else { abort("View index out of bounds") } - ArrayView::{ buf: self.buffer(), start, len: end - start } + ArrayView::make(self.buffer(), start, end - start) } ///| @@ -258,5 +270,5 @@ pub fn[T] ArrayView::op_as_view( guard start >= 0 && start <= end && end <= len else { abort("View index out of bounds") } - ArrayView::{ buf: self.buf, start: self.start + start, len: end - start } + ArrayView::make(self.buf(), self.start() + start, end - start) } diff --git a/builtin/uninitialized_array.mbt b/builtin/uninitialized_array.mbt index a90e5d8cd..e9084b861 100644 --- a/builtin/uninitialized_array.mbt +++ b/builtin/uninitialized_array.mbt @@ -81,7 +81,7 @@ pub fn[T] UninitializedArray::op_as_view( guard start >= 0 && start <= end && end <= len else { abort("View start index out of bounds") } - { buf: self, start, len: end - start } + ArrayView::make(self, start, end - start) } ///| @@ -113,8 +113,8 @@ pub fn[T] UninitializedArray::unsafe_blit( test "op_as_view with valid_range" { let arr : UninitializedArray[Int] = UninitializedArray::make(5) let view = arr[1:4] - inspect(view.start, content="1") - inspect(view.len, content="3") + inspect(view.start(), content="1") + inspect(view.len(), content="3") } ///| diff --git a/bytes/view.mbt b/bytes/view.mbt index d76a08dec..cd4fc5185 100644 --- a/bytes/view.mbt +++ b/bytes/view.mbt @@ -25,6 +25,7 @@ /// assert_eq(bv[1], b'\x02') /// assert_eq(bv[2], b'\x03') /// ``` +#builtin.valtype type View ///| diff --git a/string/deprecated.mbt b/string/deprecated.mbt index 9e0ed40dc..55b00dbb9 100644 --- a/string/deprecated.mbt +++ b/string/deprecated.mbt @@ -112,19 +112,7 @@ pub fn ends_with(self : String, str : String) -> Bool { /// multi-byte characters correctly. #deprecated("use @string.View instead") #builtin.valtype -struct StringView { - // # Fields - // - // - `str`: The source String being viewed - // - `start`: Starting UTF-16 code unit index into the string - // - `end`: Ending UTF-16 code unit index into the string (not included) - // - // `len` is not included because it will make the operation of `op_as_view` - // has complexity O(n) where n is the length of the code points in the view. - str : String - start : Int - end : Int -} +type StringView ///| /// Returns the charcode(code unit) at the given index. @@ -135,7 +123,7 @@ pub fn View::charcode_at(self : View, index : Int) -> Int { guard index >= 0 && index < self.length() else { abort("Index out of bounds") } - self.str.unsafe_charcode_at(self.start + index) + self.str().unsafe_charcode_at(self.start() + index) } ///| diff --git a/string/view.mbt b/string/view.mbt index 8a623e2fb..e6cc1cb92 100644 --- a/string/view.mbt +++ b/string/view.mbt @@ -18,6 +18,21 @@ /// multi-byte characters correctly. pub typealias StringView as View +///| +/// Returns the source string being viewed. +fn View::str(self : View) -> String = "%stringview.str" + +///| +/// Returns the starting UTF-16 code unit index into the string. +fn View::start(self : View) -> Int = "%stringview.start" + +///| +/// Returns the ending UTF-16 code unit index into the string (not included). +fn View::end(self : View) -> Int = "%stringview.end" + +///| +fn View::make_view(str : String, start : Int, end : Int) -> View = "%stringview.make" + ///| /// Returns the charcode(UTF-16 code unit) at the given index. /// @@ -35,20 +50,20 @@ pub fn View::op_get(self : View, index : Int) -> Int { guard index >= 0 && index < self.length() else { abort("Index out of bounds") } - self.str.unsafe_charcode_at(self.start + index) + self.str().unsafe_charcode_at(self.start() + index) } ///| /// Returns the original string that is being viewed. pub fn data(self : View) -> String { - self.str + self.str() } ///| /// Returns the starting offset (in UTF-16 code units) of this view into its /// underlying string. pub fn start_offset(self : View) -> Int { - self.start + self.start() } ///| @@ -56,7 +71,7 @@ pub fn start_offset(self : View) -> Int { /// /// This method counts the charcodes(code unit) in the view and has O(1) complexity. pub fn length(self : View) -> Int { - self.end - self.start + self.end() - self.start() } ///| @@ -88,7 +103,7 @@ pub fn String::view( end_offset <= self.length() else { abort("Invalid index for View") } - { str: self, start: start_offset, end: end_offset } + View::make_view(self, start_offset, end_offset) } ///| @@ -104,11 +119,11 @@ pub fn View::view( end_offset <= self.length() else { abort("Invalid index for View") } - { - str: self.str, - start: self.start + start_offset, - end: self.start + end_offset, - } + View::make_view( + self.str(), + self.start() + start_offset, + self.start() + end_offset, + ) } ///| @@ -158,13 +173,11 @@ pub fn View::charcodes(self : View, start~ : Int = 0, end? : Int) -> View { /// the view. If i is negative, it returns the index of the (n + i)-th character /// where n is the total number of Unicode characters in the view. pub fn View::offset_of_nth_char(self : View, i : Int) -> Int? { - if self.str.offset_of_nth_char( - i, - start_offset=self.start, - end_offset=self.end, - ) + if self + .str() + .offset_of_nth_char(i, start_offset=self.start(), end_offset=self.end()) is Some(index) { - Some(index - self.start) + Some(index - self.start()) } else { None } @@ -176,7 +189,7 @@ pub fn View::offset_of_nth_char(self : View, i : Int) -> Int? { /// /// This method has O(1) complexity. pub fn View::unsafe_charcode_at(self : View, index : Int) -> Int { - self.str.unsafe_charcode_at(self.start + index) + self.str().unsafe_charcode_at(self.start() + index) } ///| @@ -185,7 +198,7 @@ pub fn View::unsafe_charcode_at(self : View, index : Int) -> Int { /// Note this has O(n) complexity where n is the length of the code points in /// the view. pub fn View::char_length(self : View) -> Int { - self.str.char_length(start_offset=self.start, end_offset=self.end) + self.str().char_length(start_offset=self.start(), end_offset=self.end()) } ///| @@ -193,7 +206,9 @@ pub fn View::char_length(self : View) -> Int { /// /// This has O(n) complexity where n is the length in the parameter. pub fn View::char_length_eq(self : View, len : Int) -> Bool { - self.str.char_length_eq(len, start_offset=self.start, end_offset=self.end) + self + .str() + .char_length_eq(len, start_offset=self.start(), end_offset=self.end()) } ///| @@ -201,12 +216,14 @@ pub fn View::char_length_eq(self : View, len : Int) -> Bool { /// /// This has O(n) complexity where n is the length in the parameter. pub fn View::char_length_ge(self : View, len : Int) -> Bool { - self.str.char_length_ge(len, start_offset=self.start, end_offset=self.end) + self + .str() + .char_length_ge(len, start_offset=self.start(), end_offset=self.end()) } ///| pub impl Show for View with output(self, logger) { - let substr = self.str.substring(start=self.start, end=self.end) + let substr = self.str().substring(start=self.start(), end=self.end()) String::output(substr, logger) } @@ -221,16 +238,16 @@ pub impl Show for View with output(self, logger) { /// inspect(view.to_string(), content="Hello") /// ``` pub impl Show for StringView with to_string(self) { - self.str.substring(start=self.start, end=self.end) + self.str().substring(start=self.start(), end=self.end()) } ///| /// Returns an iterator over the Unicode characters in the string view. pub fn View::iter(self : View) -> Iter[Char] { - Iter::new(yield_ => for index in self.start.. for index in self.start().. Iter2[Int, Char] { Iter2::new(yield_ => { let len = self.length() for index = 0, n = 0; index < len; index = index + 1, n = n + 1 { - let c1 = self.str.unsafe_charcode_at(self.start + index) + let c1 = self.str().unsafe_charcode_at(self.start() + index) if is_leading_surrogate(c1) && index + 1 < len { - let c2 = self.str.unsafe_charcode_at(self.start + index + 1) + let c2 = self.str().unsafe_charcode_at(self.start() + index + 1) if is_trailing_surrogate(c2) { let c = code_point_of_surrogate_pair(c1, c2) guard yield_(n, c) is IterContinue else { break IterEnd } @@ -269,12 +286,12 @@ pub fn View::iter2(self : View) -> Iter2[Int, Char] { ///| /// Returns an iterator over the Unicode characters in the string view in reverse order. pub fn View::rev_iter(self : View) -> Iter[Char] { - Iter::new(yield_ => for index = self.end - 1 - index >= self.start + Iter::new(yield_ => for index = self.end() - 1 + index >= self.start() index = index - 1 { - let c1 = self.str.unsafe_charcode_at(index) + let c1 = self.str().unsafe_charcode_at(index) if is_trailing_surrogate(c1) && index - 1 >= 0 { - let c2 = self.str.unsafe_charcode_at(index - 1) + let c2 = self.str().unsafe_charcode_at(index - 1) if is_leading_surrogate(c2) { let c = code_point_of_surrogate_pair(c2, c1) guard yield_(c) is IterContinue else { break IterEnd } @@ -293,12 +310,12 @@ pub fn View::rev_iter(self : View) -> Iter[Char] { pub impl Eq for View with op_equal(self, other) { let len = self.length() guard len == other.length() else { return false } - if physical_equal(self.str, other.str) && self.start == other.start { + if physical_equal(self.str(), other.str()) && self.start() == other.start() { return true } for i in 0.. View raise CreatingViewError { - let str_len = self.str.length() + let str_len = self.str().length() // Calculate absolute positions in the original string let abs_end = match end { - None => self.end - Some(end) => if end < 0 { self.end + end } else { self.start + end } + None => self.end() + Some(end) => if end < 0 { self.end() + end } else { self.start() + end } + } + let abs_start = if start < 0 { + self.end() + start + } else { + self.start() + start } - let abs_start = if start < 0 { self.end + start } else { self.start + start } // Validate bounds against the original string - guard abs_start >= self.start && abs_start <= abs_end && abs_end <= self.end else { + guard abs_start >= self.start() && + abs_start <= abs_end && + abs_end <= self.end() else { raise IndexOutOfBounds } // Check for surrogate pair boundaries if abs_start < str_len && - is_trailing_surrogate(self.str.unsafe_charcode_at(abs_start)) { + is_trailing_surrogate(self.str().unsafe_charcode_at(abs_start)) { raise InvalidIndex } if abs_end < str_len && - is_trailing_surrogate(self.str.unsafe_charcode_at(abs_end)) { + is_trailing_surrogate(self.str().unsafe_charcode_at(abs_end)) { raise InvalidIndex } - { str: self.str, start: abs_start, end: abs_end } + View::make_view(self.str(), abs_start, abs_end) }