Skip to content

[stdlib] Make String.makeContiguousUTF8() strictly true #82851

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions stdlib/public/core/SmallString.swift
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ internal struct _SmallString {
extension _SmallString {
@inlinable @inline(__always)
internal static var capacity: Int {
#if _pointerBitWidth(_32) && os(watchOS)
#if os(watchOS) && _pointerBitWidth(_32)
return 10
#elseif _pointerBitWidth(_32) || _pointerBitWidth(_16)
// Note: changed from 10 for contiguous storage.
Expand All @@ -95,7 +95,7 @@ extension _SmallString {

@_alwaysEmitIntoClient @inline(__always)
internal static func contiguousCapacity() -> Int {
#if _pointerBitWidth(_32) && os(watchOS)
#if os(watchOS) && _pointerBitWidth(_32)
return capacity &- 2
#else
return capacity
Expand Down
12 changes: 11 additions & 1 deletion stdlib/public/core/StringCreate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,19 @@ extension String {
@inline(never) // slow-path
internal static func _copying(_ str: Substring) -> String {
if _fastPath(str._wholeGuts.isFastUTF8) {
return unsafe str._wholeGuts.withFastUTF8(range: str._offsetRange) {
var new = unsafe str._wholeGuts.withFastUTF8(range: str._offsetRange) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In non-watchOS or 64-bit environments, will the compiler not complain about this var being unmutated?

Copy link
Contributor Author

@glessard glessard Jul 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't notice it complaining when building on macOS. Apparently it knows to consider statements that are conditionally skipped: https://swift.godbolt.org/z/aT1naWfce

unsafe String._uncheckedFromUTF8($0)
}
#if os(watchOS) && _pointerBitWidth(_32)
// Required for compatibility with some small strings that
// may be encoded in the 32-bit slice of watchOS binaries.
if str._wholeGuts.isSmall,
str._wholeGuts.count > _SmallString.contiguousCapacity() {
new.reserveCapacity(_SmallString.capacity + 1)
return new
}
#endif
return new
}
return unsafe Array(str.utf8).withUnsafeBufferPointer {
unsafe String._uncheckedFromUTF8($0)
Expand Down
35 changes: 24 additions & 11 deletions stdlib/public/core/StringProtocol.swift
Original file line number Diff line number Diff line change
Expand Up @@ -177,15 +177,27 @@ extension StringProtocol {

// Contiguous UTF-8 strings
extension String {
/// Returns whether this string is capable of providing access to
/// validly-encoded UTF-8 contents in contiguous memory in O(1) time.
/// Returns whether this string's storage contains
/// validly-encoded UTF-8 contents in contiguous memory.
///
/// Contiguous strings always operate in O(1) time for withUTF8 and always
/// give a result for String.UTF8View.withContiguousStorageIfAvailable.
/// Contiguous strings always operate in O(1) time for withUTF8, always give
/// a result for String.UTF8View.withContiguousStorageIfAvailable, and always
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this comment actually inaccurate before? Did String.UTF8View.withContiguousStorageIfAvailable actually NOT provide a result for these exceptional small strings?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was not lying: UTF8View.withContiguousStorageIfAvailable will rearrange the bytes of a small string in order to present contiguous storage to the closure.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. With this returning false now for these strings, some might find it surprising that the former conditions are still true. However, seems like we're within our rights to do so, as this is implication ("isContiguousUTF8 -> the above guarantees hold") and not equivalence ("isContiguousUTF8 <-> the above guarantees hold").

/// return a non-nil value from `String._utf8Span` and `String.UTF8View._span`.
/// Contiguous strings also benefit from fast-paths and better optimizations.
///
@_alwaysEmitIntoClient
public var isContiguousUTF8: Bool { return _guts.isFastUTF8 }
public var isContiguousUTF8: Bool {
if _guts.isFastUTF8 {
#if os(watchOS) && _pointerBitWidth(_32)
// Required for compatibility with some small strings that
// may be encoded in the 32-bit slice of watchOS binaries.
if _guts.isSmall && _guts.count > _SmallString.contiguousCapacity() {
return false
}
#endif
return true
}
return false
}

/// If this string is not contiguous, make it so. If this mutates the string,
/// it will invalidate any pre-existing indices.
Expand Down Expand Up @@ -223,13 +235,14 @@ extension String {

// Contiguous UTF-8 strings
extension Substring {
/// Returns whether this string is capable of providing access to
/// validly-encoded UTF-8 contents in contiguous memory in O(1) time.
/// Returns whether this string's storage contains
/// validly-encoded UTF-8 contents in contiguous memory.
///
/// Contiguous strings always operate in O(1) time for withUTF8 and always
/// give a result for String.UTF8View.withContiguousStorageIfAvailable.
/// Contiguous strings always operate in O(1) time for withUTF8, always give
/// a result for Substring.UTF8View.withContiguousStorageIfAvailable, and
/// always return a non-nil value from `Substring._utf8Span` and
/// `Substring.UTF8View._span`.
/// Contiguous strings also benefit from fast-paths and better optimizations.
///
@_alwaysEmitIntoClient
public var isContiguousUTF8: Bool { return self.base.isContiguousUTF8 }

Expand Down
67 changes: 67 additions & 0 deletions test/stdlib/Span/SmallStringCompatibility.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
//===--- SmallStringCompatibility.swift -----------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2025 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

// RUN: %target-run-stdlib-swift

// REQUIRES: executable_test
// REQUIRES: CPU=wasm32 || CPU=arm64_32

import StdlibUnittest

var suite = TestSuite("SmallStringCompatibility")
defer { runAllTests() }

var strings = [
("Small", true),
("Less small", true),
("Positively large.", true),
]

#if os(watchOS)
strings[1].1 = false
#endif

strings.forEach { (string, contiguous) in
suite.test("Contiguous: \(string)")
.require(.stdlib_6_2).code {

expectEqual(string.isContiguousUTF8, contiguous)
}
}

strings.forEach { (string, contiguous) in
suite.test("Contiguous Substring: \(string)")
.require(.stdlib_6_2).code {
let substring = string[...]
expectEqual(substring.isContiguousUTF8, contiguous)
}
}

strings.forEach { (string, contiguous) in
suite.test("String.makeContiguousUTF8: \(string)")
.require(.stdlib_6_2).code {
var s = string
s.makeContiguousUTF8()
expectTrue(s.isContiguousUTF8)
expectEqual(s, string)
}
}

strings.forEach { (string, contiguous) in
suite.test("Substring.makeContiguousUTF8: \(string)")
.require(.stdlib_6_2).code {
var s: Substring = string.dropFirst().dropLast()
s.makeContiguousUTF8()
expectTrue(s.isContiguousUTF8)
expectEqual(s, string.dropFirst().dropLast())
}
}