From 3de138ed242d4ef7aa1dce91161e9297f6a016bf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 25 Mar 2026 09:35:22 +0100 Subject: [PATCH] gh-146385: Switch back to re to detect shlex.quote slow path Commit 06a26fda60 ("gh-118761: Optimise import time for ``shlex`` (#132036)") when the input has to be quoted. This is because the regular expression search was able to short-circuit at the first unsafe character. Go back to the same algorithm as 3.13, but make the "import re" and compilation of the regular expression lazy. Testing s.isascii() makes shlex.quote() twice as fast in the non-ASCII case, but costs up to 25% of the full run time (because it necessitates an earlier isinstance check) if the string *is* ASCII. The latter is probably the common case, so drop the check. --- Lib/shlex.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/Lib/shlex.py b/Lib/shlex.py index 5959f52dd12639..31349b96afcc73 100644 --- a/Lib/shlex.py +++ b/Lib/shlex.py @@ -317,20 +317,22 @@ def join(split_command): return ' '.join(quote(arg) for arg in split_command) +def _find_unsafe_lazy(s): + import re # deferred import and compilation for performance + + global _find_unsafe + _find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search + return _find_unsafe(s) + + +_find_unsafe = _find_unsafe_lazy + + def quote(s): """Return a shell-escaped version of the string *s*.""" if not s: return "''" - - if not isinstance(s, str): - raise TypeError(f"expected string object, got {type(s).__name__!r}") - - # Use bytes.translate() for performance - safe_chars = (b'%+,-./0123456789:=@' - b'ABCDEFGHIJKLMNOPQRSTUVWXYZ_' - b'abcdefghijklmnopqrstuvwxyz') - # No quoting is needed if `s` is an ASCII string consisting only of `safe_chars` - if s.isascii() and not s.encode().translate(None, delete=safe_chars): + if _find_unsafe(s) is None: return s # use single quotes, and put single quotes into double quotes