rust-lang · Kmeakin · Aug 2, 2025 · Aug 9, 2025 · Aug 9, 2025 · Aug 10, 2025
diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs
@@ -418,9 +418,8 @@ impl str {
         }
 
         fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
-            use core::unicode::{Case_Ignorable, Cased};
-            match iter.skip_while(|&c| Case_Ignorable(c)).next() {
-                Some(c) => Cased(c),
+            match iter.skip_while(|&c| c.is_case_ignorable()).next() {
+                Some(c) => c.is_cased(),
                 None => false,
             }
         }

diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
@@ -6,7 +6,7 @@ use crate::slice;
 use crate::str::from_utf8_unchecked_mut;
 use crate::ub_checks::assert_unsafe_precondition;
 use crate::unicode::printable::is_printable;
-use crate::unicode::{self, conversions};
+use crate::unicode::{self, Case_Ignorable, conversions};
 
 impl char {
     /// The lowest valid code point a `char` can have, `'\0'`.
@@ -950,7 +950,11 @@ impl char {
     #[stable(feature = "rust1", since = "1.0.0")]
     #[inline]
     pub fn is_control(self) -> bool {
-        unicode::Cc(self)
+        // According to
+        // https://www.unicode.org/policies/stability_policy.html#Property_Value,
+        // the set of codepoints in `Cc` will never change. So we can hard-code
+        // the patterns to match against instead of using a table.
+        matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
     }
 
     /// Returns `true` if this `char` has the `Grapheme_Extend` property.
@@ -965,7 +969,47 @@ impl char {
     #[must_use]
     #[inline]
     pub(crate) fn is_grapheme_extended(self) -> bool {
-        unicode::Grapheme_Extend(self)
+        !self.is_ascii() && unicode::Grapheme_Extend(self)
+    }
+
+    /// Returns `true` if this `char` has the `Cased` derived property.
+    ///
+    /// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
+    /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
+    ///
+    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
+    #[must_use]
+    #[inline]
+    #[doc(hidden)]
+    #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
+    pub fn is_cased(self) -> bool {
+        if self.is_ascii() {
+            self.is_ascii_alphabetic()
+        } else {
+            unicode::Lowercase(self) || unicode::Uppercase(self) || unicode::Lt(self)
+        }
+    }
+
+    /// Returns `true` if this `char` has the `Case_Ignorable` property.
+    ///
+    /// `Case_Ignorable` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
+    /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
+    ///
+    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
+    #[must_use]
+    #[inline]
+    #[doc(hidden)]
+    #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
+    pub fn is_case_ignorable(self) -> bool {
+        if self.is_ascii() {
+            matches!(self, '\'' | '.' | ':' | '^' | '`')
+        } else {
+            Case_Ignorable(self)
+        }
     }
 
     /// Returns `true` if this `char` has one of the general categories for numbers.

diff --git a/library/core/src/unicode/mod.rs b/library/core/src/unicode/mod.rs
@@ -3,15 +3,14 @@
 
 // for use in alloc, not re-exported in std.
 #[rustfmt::skip]
-pub use unicode_data::case_ignorable::lookup as Case_Ignorable;
-pub use unicode_data::cased::lookup as Cased;
 pub use unicode_data::conversions;
 
 #[rustfmt::skip]
 pub(crate) use unicode_data::alphabetic::lookup as Alphabetic;
-pub(crate) use unicode_data::cc::lookup as Cc;
+pub(crate) use unicode_data::case_ignorable::lookup as Case_Ignorable;
 pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
 pub(crate) use unicode_data::lowercase::lookup as Lowercase;
+pub(crate) use unicode_data::lt::lookup as Lt;
 pub(crate) use unicode_data::n::lookup as N;
 pub(crate) use unicode_data::uppercase::lookup as Uppercase;
 pub(crate) use unicode_data::white_space::lookup as White_Space;

diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs
diff --git a/src/tools/unicode-table-generator/src/cascading_map.rs b/src/tools/unicode-table-generator/src/cascading_map.rs
diff --git a/src/tools/unicode-table-generator/src/case_mapping.rs b/src/tools/unicode-table-generator/src/case_mapping.rs
@@ -6,24 +6,26 @@ use crate::{UnicodeData, fmt_list};
 
 const INDEX_MASK: u32 = 1 << 22;
 
-pub(crate) fn generate_case_mapping(data: &UnicodeData) -> String {
+pub(crate) fn generate_case_mapping(data: &UnicodeData) -> (String, [usize; 2]) {
     let mut file = String::new();
 
     write!(file, "const INDEX_MASK: u32 = 0x{INDEX_MASK:x};").unwrap();
     file.push_str("\n\n");
     file.push_str(HEADER.trim_start());
     file.push('\n');
-    file.push_str(&generate_tables("LOWER", &data.to_lower));
+    let (lower_tables, lower_size) = generate_tables("LOWER", &data.to_lower);
+    file.push_str(&lower_tables);
     file.push_str("\n\n");
-    file.push_str(&generate_tables("UPPER", &data.to_upper));
-    file
+    let (upper_tables, upper_size) = generate_tables("UPPER", &data.to_upper);
+    file.push_str(&upper_tables);
+    (file, [lower_size, upper_size])
 }
 
-fn generate_tables(case: &str, data: &BTreeMap<u32, (u32, u32, u32)>) -> String {
+fn generate_tables(case: &str, data: &BTreeMap<u32, [u32; 3]>) -> (String, usize) {
     let mut mappings = Vec::with_capacity(data.len());
     let mut multis = Vec::new();
 
-    for (&key, &(a, b, c)) in data.iter() {
+    for (&key, &[a, b, c]) in data.iter() {
         let key = char::from_u32(key).unwrap();
 
         if key.is_ascii() {
@@ -46,16 +48,31 @@ fn generate_tables(case: &str, data: &BTreeMap<u32, (u32, u32, u32)>) -> String
     }
 
     let mut tables = String::new();
-
-    write!(tables, "static {}CASE_TABLE: &[(char, u32)] = &[{}];", case, fmt_list(mappings))
-        .unwrap();
+    let mut size = 0;
+
+    size += size_of_val(mappings.as_slice());
+    write!(
+        tables,
+        "static {}CASE_TABLE: &[(char, u32); {}] = &[{}];",
+        case,
+        mappings.len(),
+        fmt_list(mappings),
+    )
+    .unwrap();
 
     tables.push_str("\n\n");
 
-    write!(tables, "static {}CASE_TABLE_MULTI: &[[char; 3]] = &[{}];", case, fmt_list(multis))
-        .unwrap();
-
-    tables
+    size += size_of_val(multis.as_slice());
+    write!(
+        tables,
+        "static {}CASE_TABLE_MULTI: &[[char; 3]; {}] = &[{}];",
+        case,
+        multis.len(),
+        fmt_list(multis),
+    )
+    .unwrap();
+
+    (tables, size)
 }
 
 struct CharEscape(char);