From 043cb50e527926cd88dc3b6e2b5b0f6c90a89e81 Mon Sep 17 00:00:00 2001 From: Joel Sallow <32407840+vexx32@users.noreply.github.com> Date: Wed, 12 Feb 2020 23:39:42 -0500 Subject: [PATCH 1/4] :bug: Allow signed types to parse hex properly See #11823 for context and discussion. This change allows shorter hex literals to be treated as signed, if the specified type suffix is a signed type with correct hex lengths. For example, 0xFFFFs now parses correctly to -1 instead of erroring. --- .../engine/parser/tokenizer.cs | 76 +++++++++++++++---- .../Language/Parser/Parser.Tests.ps1 | 9 ++- 2 files changed, 68 insertions(+), 17 deletions(-) diff --git a/src/System.Management.Automation/engine/parser/tokenizer.cs b/src/System.Management.Automation/engine/parser/tokenizer.cs index e11d7f8de7d..c5727b75031 100644 --- a/src/System.Management.Automation/engine/parser/tokenizer.cs +++ b/src/System.Management.Automation/engine/parser/tokenizer.cs @@ -1460,17 +1460,28 @@ private char Backtick(char c, out char surrogateCharacter) switch (c) { - case '0': return '\0'; - case 'a': return '\a'; - case 'b': return '\b'; - case 'e': return '\u001b'; - case 'f': return '\f'; - case 'n': return '\n'; - case 'r': return '\r'; - case 't': return '\t'; - case 'u': return ScanUnicodeEscape(out surrogateCharacter); - case 'v': return '\v'; - default: return c; + case '0': + return '\0'; + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'e': + return '\u001b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'u': + return ScanUnicodeEscape(out surrogateCharacter); + case 'v': + return '\v'; + default: + return c; } } @@ -3557,8 +3568,9 @@ private static bool TryGetNumberValue( { try { - NumberStyles style = NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | - NumberStyles.AllowExponent; + NumberStyles style = NumberStyles.AllowLeadingSign + | NumberStyles.AllowDecimalPoint + | NumberStyles.AllowExponent; if (real) { @@ -3685,9 +3697,40 @@ private static bool TryGetNumberValue( } // If we're expecting a sign bit, remove the leading 0 added in ScanNumberHelper - if (!suffix.HasFlag(NumberSuffixFlags.Unsigned) && ((strNum.Length - 1) & 7) == 0) + if (!suffix.HasFlag(NumberSuffixFlags.Unsigned)) { - strNum = strNum.Slice(1); + var expectedLength = suffix switch + { + NumberSuffixFlags.SignedByte => 2, + NumberSuffixFlags.Short => 4, + NumberSuffixFlags.Long => 16, + // No suffix flag can mean int or long depending on input string length + _ => strNum.Length < 16 ? 8 : 16 + }; + + if (strNum.Length == expectedLength) + { + strNum = strNum.Slice(1); + } + } + + + // If we're expecting a sign bit, remove the leading 0 added in ScanNumberHelper + if (!suffix.HasFlag(NumberSuffixFlags.Unsigned)) + { + var expectedLength = suffix switch + { + NumberSuffixFlags.SignedByte => 2, + NumberSuffixFlags.Short => 4, + NumberSuffixFlags.Long => 16, + // No suffix flag can mean int or long depending on input string length + _ => strNum.Length < 16 ? 8 : 16 + }; + + if (strNum.Length == expectedLength) + { + strNum = strNum.Slice(1); + } } style = NumberStyles.AllowHexSpecifier; @@ -4988,7 +5031,8 @@ internal Token NextToken() _currentIndex = _tokenStart; c = GetChar(); - if (strNum == null) { return ScanGenericToken(c); } + if (strNum == null) + { return ScanGenericToken(c); } } return NewToken(TokenKind.Exclaim); diff --git a/test/powershell/Language/Parser/Parser.Tests.ps1 b/test/powershell/Language/Parser/Parser.Tests.ps1 index 5105f9e9e03..eb5419a4440 100644 --- a/test/powershell/Language/Parser/Parser.Tests.ps1 +++ b/test/powershell/Language/Parser/Parser.Tests.ps1 @@ -703,7 +703,7 @@ foo``u{2195}abc if ( $IsLinux -or $IsMacOS ) { # because we execute on *nix based on executable bit, and the file name doesn't matter # so we can use the same filename as for windows, just make sure it's executable with chmod - "#!/bin/sh`necho ""Hello World""" | out-file -encoding ASCII $shellfile + "#!/bin/sh`necho ""Hello World""" | Out-File -encoding ASCII $shellfile /bin/chmod +x $shellfile } else { @@ -931,6 +931,7 @@ foo``u{2195}abc @{ Script = "0x0y"; ExpectedValue = "0"; ExpectedType = [sbyte] } @{ Script = "0x41y"; ExpectedValue = "65"; ExpectedType = [sbyte] } @{ Script = "-0x41y"; ExpectedValue = "-65"; ExpectedType = [sbyte] } + @{ Script = "0xFFy"; ExpectedValue = "-1"; ExpectedType = [sbyte] } #Binary @{ Script = "0b0y"; ExpectedValue = "0"; ExpectedType = [sbyte] } @{ Script = "0b10y"; ExpectedValue = "2"; ExpectedType = [sbyte] } @@ -957,6 +958,7 @@ foo``u{2195}abc @{ Script = "0x0s"; ExpectedValue = "0"; ExpectedType = [short] } @{ Script = "0x41s"; ExpectedValue = "65"; ExpectedType = [short] } @{ Script = "-0x41s"; ExpectedValue = "-65"; ExpectedType = [short] } + @{ Script = "0xFFFFs"; ExpectedValue = "-1"; ExpectedType = [short] } #Binary @{ Script = "0b0s"; ExpectedValue = "0"; ExpectedType = [short] } @{ Script = "0b10s"; ExpectedValue = "2"; ExpectedType = [short] } @@ -985,6 +987,7 @@ foo``u{2195}abc @{ Script = "0x0l"; ExpectedValue = "0"; ExpectedType = [long] } @{ Script = "0x41l"; ExpectedValue = "65"; ExpectedType = [long] } @{ Script = "-0x41l"; ExpectedValue = "-65"; ExpectedType = [long] } + @{ Script = "0xFFFFFFFFFFFFFFFFl"; ExpectedValue = "-1"; ExpectedType = [long] } #Binary @{ Script = "0b0l"; ExpectedValue = "0"; ExpectedType = [long] } @{ Script = "0b10l"; ExpectedValue = "2"; ExpectedType = [long] } @@ -1078,6 +1081,7 @@ foo``u{2195}abc #Hexadecimal @{ Script = "0x0uy"; ExpectedValue = "0"; ExpectedType = [byte] } @{ Script = "0x41uy"; ExpectedValue = "65"; ExpectedType = [byte] } + @{ Script = "0xFFuy"; ExpectedValue = [byte]::MaxValue; ExpectedType = [byte] } #Binary @{ Script = "0b0uy"; ExpectedValue = "0"; ExpectedType = [byte] } @{ Script = "0b10uy"; ExpectedValue = "2"; ExpectedType = [byte] } @@ -1098,6 +1102,8 @@ foo``u{2195}abc #Hexadecimal @{ Script = "0x0us"; ExpectedValue = "0"; ExpectedType = [ushort] } @{ Script = "0x41us"; ExpectedValue = "65"; ExpectedType = [ushort] } + @{ Script = "0x41us"; ExpectedValue = "65"; ExpectedType = [ushort] } + @{ Script = "0xFFFFus"; ExpectedValue = [ushort]::MaxValue; ExpectedType = [ushort] } #Binary @{ Script = "0b0us"; ExpectedValue = "0"; ExpectedType = [ushort] } @{ Script = "0b10us"; ExpectedValue = "2"; ExpectedType = [ushort] } @@ -1119,6 +1125,7 @@ foo``u{2195}abc #Hexadecimal @{ Script = "0x0ul"; ExpectedValue = "0"; ExpectedType = [ulong] } @{ Script = "0x41ul"; ExpectedValue = "65"; ExpectedType = [ulong] } + @{ Script = "0xFFFFFFFFFFFFFFFFul"; ExpectedValue = [ulong]::MaxValue; ExpectedType = [ulong] } #Binary @{ Script = "0b0ul"; ExpectedValue = "0"; ExpectedType = [ulong] } @{ Script = "0b10ul"; ExpectedValue = "2"; ExpectedType = [ulong] } From a7eae2413ec3428ff59cb2ecb6776b6bccdb3f5f Mon Sep 17 00:00:00 2001 From: Joel Sallow <32407840+vexx32@users.noreply.github.com> Date: Thu, 13 Feb 2020 00:07:59 -0500 Subject: [PATCH 2/4] :bug: Fix logic error --- src/System.Management.Automation/engine/parser/tokenizer.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/System.Management.Automation/engine/parser/tokenizer.cs b/src/System.Management.Automation/engine/parser/tokenizer.cs index c5727b75031..5e4d2d5e2f8 100644 --- a/src/System.Management.Automation/engine/parser/tokenizer.cs +++ b/src/System.Management.Automation/engine/parser/tokenizer.cs @@ -3727,7 +3727,8 @@ private static bool TryGetNumberValue( _ => strNum.Length < 16 ? 8 : 16 }; - if (strNum.Length == expectedLength) + // Add one to account for the added 0 from ScanNumberHelper + if (strNum.Length == expectedLength + 1) { strNum = strNum.Slice(1); } From dfe7feb292fd40a095736370fc8294e9bbd4ec96 Mon Sep 17 00:00:00 2001 From: Joel Sallow <32407840+vexx32@users.noreply.github.com> Date: Thu, 13 Feb 2020 00:30:55 -0500 Subject: [PATCH 3/4] :fire: Remove duplicated code segment --- .../engine/parser/tokenizer.cs | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/src/System.Management.Automation/engine/parser/tokenizer.cs b/src/System.Management.Automation/engine/parser/tokenizer.cs index 5e4d2d5e2f8..07144c72a8e 100644 --- a/src/System.Management.Automation/engine/parser/tokenizer.cs +++ b/src/System.Management.Automation/engine/parser/tokenizer.cs @@ -3708,26 +3708,6 @@ private static bool TryGetNumberValue( _ => strNum.Length < 16 ? 8 : 16 }; - if (strNum.Length == expectedLength) - { - strNum = strNum.Slice(1); - } - } - - - // If we're expecting a sign bit, remove the leading 0 added in ScanNumberHelper - if (!suffix.HasFlag(NumberSuffixFlags.Unsigned)) - { - var expectedLength = suffix switch - { - NumberSuffixFlags.SignedByte => 2, - NumberSuffixFlags.Short => 4, - NumberSuffixFlags.Long => 16, - // No suffix flag can mean int or long depending on input string length - _ => strNum.Length < 16 ? 8 : 16 - }; - - // Add one to account for the added 0 from ScanNumberHelper if (strNum.Length == expectedLength + 1) { strNum = strNum.Slice(1); From 6282858e48b968ba0fd1a6687e88f67fb9aa3ff9 Mon Sep 17 00:00:00 2001 From: Joel Sallow <32407840+vexx32@users.noreply.github.com> Date: Thu, 13 Feb 2020 23:57:18 -0500 Subject: [PATCH 4/4] :art: Fix code style --- src/System.Management.Automation/engine/parser/tokenizer.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/System.Management.Automation/engine/parser/tokenizer.cs b/src/System.Management.Automation/engine/parser/tokenizer.cs index 07144c72a8e..26b79cefdef 100644 --- a/src/System.Management.Automation/engine/parser/tokenizer.cs +++ b/src/System.Management.Automation/engine/parser/tokenizer.cs @@ -5013,7 +5013,9 @@ internal Token NextToken() c = GetChar(); if (strNum == null) - { return ScanGenericToken(c); } + { + return ScanGenericToken(c); + } } return NewToken(TokenKind.Exclaim);