Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 27 additions & 26 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 20 additions & 10 deletions crates/vm/src/anystr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ use crate::{
convert::TryFromBorrowedObject,
function::OptionalOption,
};
use icu_properties::{
CodePointSetData,
props::{Alphabetic, ChangesWhenLowercased, ChangesWhenUppercased},
};
use num_traits::{cast::ToPrimitive, sign::Signed};

use core::ops::Range;
Expand Down Expand Up @@ -130,8 +134,6 @@ where
}

pub trait AnyChar: Copy {
fn is_lowercase(self) -> bool;
fn is_uppercase(self) -> bool;
fn bytes_len(self) -> usize;
}

Expand Down Expand Up @@ -407,12 +409,16 @@ pub trait AnyStr {
// _Py_bytes_islower
// unicode_islower_impl
fn py_islower(&self) -> bool {
let case_change = CodePointSetData::new::<ChangesWhenLowercased>();
let alphabetic = CodePointSetData::new::<Alphabetic>();
let mut lower = false;
for c in self.elements() {
if c.is_uppercase() {
for chunk in self.as_bytes().utf8_chunks().map(|c| c.valid()) {
if chunk.chars().any(|c| case_change.contains(c)) {
return false;
} else if !lower && c.is_lowercase() {
lower = true
}

if !lower && chunk.chars().any(|c| alphabetic.contains(c)) {
lower = true;
}
}
lower
Expand All @@ -422,12 +428,16 @@ pub trait AnyStr {
// Py_bytes_isupper
// unicode_isupper_impl
fn py_isupper(&self) -> bool {
let case_change = CodePointSetData::new::<ChangesWhenUppercased>();
let alphabetic = CodePointSetData::new::<Alphabetic>();
let mut upper = false;
for c in self.elements() {
if c.is_lowercase() {
for chunk in self.as_bytes().utf8_chunks().map(|c| c.valid()) {
if chunk.chars().any(|c| case_change.contains(c)) {
return false;
} else if !upper && c.is_uppercase() {
upper = true
}

if !upper && chunk.chars().any(|c| alphabetic.contains(c)) {
upper = true;
}
}
upper
Expand Down
22 changes: 0 additions & 22 deletions crates/vm/src/builtins/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2228,14 +2228,6 @@ impl AnyStrContainer<str> for String {
}

impl anystr::AnyChar for char {
fn is_lowercase(self) -> bool {
self.is_lowercase()
}

fn is_uppercase(self) -> bool {
self.is_uppercase()
}

fn bytes_len(self) -> usize {
self.len_utf8()
}
Expand Down Expand Up @@ -2341,12 +2333,6 @@ impl AnyStrContainer<Wtf8> for Wtf8Buf {
}

impl anystr::AnyChar for CodePoint {
fn is_lowercase(self) -> bool {
self.is_char_and(char::is_lowercase)
}
fn is_uppercase(self) -> bool {
self.is_char_and(char::is_uppercase)
}
fn bytes_len(self) -> usize {
self.len_wtf8()
}
Expand Down Expand Up @@ -2459,14 +2445,6 @@ impl AnyStrContainer<AsciiStr> for AsciiString {
}

impl anystr::AnyChar for ascii::AsciiChar {
fn is_lowercase(self) -> bool {
self.is_lowercase()
}

fn is_uppercase(self) -> bool {
self.is_uppercase()
}

fn bytes_len(self) -> usize {
1
}
Expand Down
8 changes: 0 additions & 8 deletions crates/vm/src/bytes_inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1031,14 +1031,6 @@ impl AnyStrContainer<[u8]> for Vec<u8> {
const ASCII_WHITESPACES: [u8; 6] = [0x20, 0x09, 0x0a, 0x0c, 0x0d, 0x0b];

impl anystr::AnyChar for u8 {
fn is_lowercase(self) -> bool {
self.is_ascii_lowercase()
}

fn is_uppercase(self) -> bool {
self.is_ascii_uppercase()
}

fn bytes_len(self) -> usize {
1
}
Expand Down
12 changes: 12 additions & 0 deletions extra_tests/snippets/builtin_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@
assert not a.isdecimal()
assert not a.isnumeric()
assert a.istitle()
assert "\u1c89".istitle()
# assert "DZ".title() == "Dz"
assert a.isalpha()

s = "1 2 3"
Expand Down Expand Up @@ -220,6 +222,14 @@
assert "abc\t12345\txyz".expandtabs() == "abc 12345 xyz"
assert "-".join(["1", "2", "3"]) == "1-2-3"
assert "HALLO".isupper()
assert not "123".isupper()
assert not "123".islower()
assert not "\U0001f431".isupper()
assert not "\U0001f431".islower()
assert "\U0001f431 CAT".isupper()
assert "\U0001f431 cat".islower()
assert "\u0295".islower()
assert "\u1c89".isupper()
Copy link
Copy Markdown
Contributor

@ShaharNaveh ShaharNaveh Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If CPython doesn't have a test case that cover those edge cases, I think it will be worth trying to get it upstream

assert "hello, my name is".partition("my ") == ("hello, ", "my ", "name is")
assert "hello".partition("is") == ("hello", "", "")
assert "hello, my name is".rpartition("is") == ("hello, my name ", "is", "")
Expand All @@ -236,6 +246,8 @@
assert not "123".isidentifier()

assert "Σίσυφος".swapcase() == "σΊΣΥΦΟΣ"
assert "\u0295".swapcase() == "\u0295"
assert "\u1c89".swapcase() == "\u1c8a"

# String Formatting
assert "{} {}".format(1, 2) == "1 2"
Expand Down
Loading