From fd9f55612a271500498e6288a05be22cad774193 Mon Sep 17 00:00:00 2001 From: Steve Lee Date: Tue, 3 Dec 2019 13:20:47 -0800 Subject: [PATCH 1/3] Update calculation of char width to respect CJK chars correctly --- .../host/msh/ConsoleControl.cs | 58 ++++++++++--------- .../FormatAndOutput/common/ILineOutput.cs | 32 +++++++++- .../Format-List.Tests.ps1 | 20 +++++++ .../Format-Table.Tests.ps1 | 19 ++++++ 4 files changed, 100 insertions(+), 29 deletions(-) diff --git a/src/Microsoft.PowerShell.ConsoleHost/host/msh/ConsoleControl.cs b/src/Microsoft.PowerShell.ConsoleHost/host/msh/ConsoleControl.cs index 75697a5f44d..f4576c0ed51 100644 --- a/src/Microsoft.PowerShell.ConsoleHost/host/msh/ConsoleControl.cs +++ b/src/Microsoft.PowerShell.ConsoleHost/host/msh/ConsoleControl.cs @@ -2781,7 +2781,36 @@ internal static int LengthInBufferCells(string str, int offset, bool checkEscape } } - return str.Length - offset - escapeSequenceAdjustment; + int length = 0; + foreach (char c in str) + { + length += LengthInBufferCells(c); + } + + return length - offset - escapeSequenceAdjustment; + } + + internal static int LengthInBufferCells(char c) + { + // The following is based on http://www.cl.cam.ac.uk/~mgk25/c/wcwidth.c + // which is derived from https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt + + bool isWide = c >= 0x1100 && + (c <= 0x115f || /* Hangul Jamo init. consonants */ + c == 0x2329 || c == 0x232a || + (c >= 0x2e80 && c <= 0xa4cf && + c != 0x303f) || /* CJK ... Yi */ + (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */ + (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */ + (c >= 0xfe10 && c <= 0xfe19) || /* Vertical forms */ + (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */ + (c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */ + (c >= 0xffe0 && c <= 0xffe6)); + // We can ignore these ranges because .Net strings use surrogate pairs + // for this range and we do not handle surrogage pairs. + // (c >= 0x20000 && c <= 0x2fffd) || + // (c >= 0x30000 && c <= 0x3fffd) + return 1 + (isWide ? 1 : 0); } #if !UNIX @@ -2950,33 +2979,6 @@ private static HostException CreateHostException( #endregion helper - #region - - internal static int LengthInBufferCells(char c) - { - // The following is based on http://www.cl.cam.ac.uk/~mgk25/c/wcwidth.c - // which is derived from https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt - - bool isWide = c >= 0x1100 && - (c <= 0x115f || /* Hangul Jamo init. consonants */ - c == 0x2329 || c == 0x232a || - (c >= 0x2e80 && c <= 0xa4cf && - c != 0x303f) || /* CJK ... Yi */ - (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */ - (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */ - (c >= 0xfe10 && c <= 0xfe19) || /* Vertical forms */ - (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */ - (c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */ - (c >= 0xffe0 && c <= 0xffe6)); - // We can ignore these ranges because .Net strings use surrogate pairs - // for this range and we do not handle surrogage pairs. - // (c >= 0x20000 && c <= 0x2fffd) || - // (c >= 0x30000 && c <= 0x3fffd) - return 1 + (isWide ? 1 : 0); - } - - #endregion - #region SendInput internal static void MimicKeyPress(INPUT[] inputs) diff --git a/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs b/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs index ea67c1bf11e..6e81e248a9e 100644 --- a/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs +++ b/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs @@ -29,7 +29,14 @@ internal virtual int Length(string str) internal virtual int Length(string str, int offset) { - return str.Length - offset; + int length = 0; + + foreach (char c in str) + { + length += LengthInBufferCells(c); + } + + return length - offset; } internal virtual int Length(char character) { return 1; } @@ -58,6 +65,29 @@ internal virtual int GetTailSplitLength(string str, int offset, int displayCells #region Helpers + protected static int LengthInBufferCells(char c) + { + // The following is based on http://www.cl.cam.ac.uk/~mgk25/c/wcwidth.c + // which is derived from https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt + + bool isWide = c >= 0x1100 && + (c <= 0x115f || /* Hangul Jamo init. consonants */ + c == 0x2329 || c == 0x232a || + (c >= 0x2e80 && c <= 0xa4cf && + c != 0x303f) || /* CJK ... Yi */ + (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */ + (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */ + (c >= 0xfe10 && c <= 0xfe19) || /* Vertical forms */ + (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */ + (c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */ + (c >= 0xffe0 && c <= 0xffe6)); + // We can ignore these ranges because .Net strings use surrogate pairs + // for this range and we do not handle surrogage pairs. + // (c >= 0x20000 && c <= 0x2fffd) || + // (c >= 0x30000 && c <= 0x3fffd) + return 1 + (isWide ? 1 : 0); + } + /// /// Given a string and a number of display cells, it computes how many /// characters would fit starting from the beginning or end of the string. diff --git a/test/powershell/Modules/Microsoft.PowerShell.Utility/Format-List.Tests.ps1 b/test/powershell/Modules/Microsoft.PowerShell.Utility/Format-List.Tests.ps1 index 0d3056d9150..4ebba15a183 100644 --- a/test/powershell/Modules/Microsoft.PowerShell.Utility/Format-List.Tests.ps1 +++ b/test/powershell/Modules/Microsoft.PowerShell.Utility/Format-List.Tests.ps1 @@ -159,4 +159,24 @@ Describe "Format-List DRT basic functionality" -Tags "CI" { $result | Should -Match "Name\s*:\s*test.txt" $result | Should -Match "Length\s*:\s*5" } + + It "Format-List should work with double byte wide chars" { + $obj = [pscustomobject]@{ + "哇" = "62"; + "dbda" = "KM"; + "消息" = "千" + } + + $expected = @" + +哇 : 62 +dbda : KM +消息 : 千 + + + +"@ + + $obj | Format-List | Out-String | Should -BeExactly $expected + } } diff --git a/test/powershell/Modules/Microsoft.PowerShell.Utility/Format-Table.Tests.ps1 b/test/powershell/Modules/Microsoft.PowerShell.Utility/Format-Table.Tests.ps1 index 8a86d146f7b..f2132aa280f 100644 --- a/test/powershell/Modules/Microsoft.PowerShell.Utility/Format-Table.Tests.ps1 +++ b/test/powershell/Modules/Microsoft.PowerShell.Utility/Format-Table.Tests.ps1 @@ -812,4 +812,23 @@ A Name B $output = [pscustomobject] @{ one = 1 } | Format-Table @{ l='one'; e='one'; width=10; alignment='center' } | Out-String $output.Replace("`r","").Replace(" ",".").Replace("`n","^") | Should -BeExactly $expectedTable.Replace("`r","").Replace(" ",".").Replace("`n","^") } + + It "Should be formatted correctly with double byte wide chars" { + $obj = [pscustomobject]@{ + "哇" = "62"; + "dbda" = "KM"; + "消息" = "千" + } + + $expected = @" + +哇 dbda 消息 +-- ---- ---- +62 KM 千 + + +"@ + + $obj | Format-Table | Out-String | Should -BeExactly $expected + } } From 2a614c73e41eb088c84f6ef55a8296a06867bb11 Mon Sep 17 00:00:00 2001 From: Steve Lee Date: Tue, 3 Dec 2019 17:05:35 -0800 Subject: [PATCH 2/3] fix CodeFactor issues --- .../FormatAndOutput/common/ILineOutput.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs b/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs index 6e81e248a9e..8dc21c416c1 100644 --- a/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs +++ b/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs @@ -69,7 +69,6 @@ protected static int LengthInBufferCells(char c) { // The following is based on http://www.cl.cam.ac.uk/~mgk25/c/wcwidth.c // which is derived from https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt - bool isWide = c >= 0x1100 && (c <= 0x115f || /* Hangul Jamo init. consonants */ c == 0x2329 || c == 0x232a || @@ -81,6 +80,7 @@ protected static int LengthInBufferCells(char c) (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */ (c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */ (c >= 0xffe0 && c <= 0xffe6)); + // We can ignore these ranges because .Net strings use surrogate pairs // for this range and we do not handle surrogage pairs. // (c >= 0x20000 && c <= 0x2fffd) || From 294a479900849da6407bb8465857cfac5efea63e Mon Sep 17 00:00:00 2001 From: Steve Lee Date: Wed, 4 Dec 2019 10:03:46 -0800 Subject: [PATCH 3/3] address Ilya's feedback --- .../host/msh/ConsoleControl.cs | 16 ++++++++-------- .../FormatAndOutput/common/ILineOutput.cs | 14 +++++++------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Microsoft.PowerShell.ConsoleHost/host/msh/ConsoleControl.cs b/src/Microsoft.PowerShell.ConsoleHost/host/msh/ConsoleControl.cs index f4576c0ed51..45d95ff7920 100644 --- a/src/Microsoft.PowerShell.ConsoleHost/host/msh/ConsoleControl.cs +++ b/src/Microsoft.PowerShell.ConsoleHost/host/msh/ConsoleControl.cs @@ -2794,18 +2794,18 @@ internal static int LengthInBufferCells(char c) { // The following is based on http://www.cl.cam.ac.uk/~mgk25/c/wcwidth.c // which is derived from https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt - bool isWide = c >= 0x1100 && (c <= 0x115f || /* Hangul Jamo init. consonants */ c == 0x2329 || c == 0x232a || - (c >= 0x2e80 && c <= 0xa4cf && + ((uint)(c - 0x2e80) <= (0xa4cf - 0x2e80) && c != 0x303f) || /* CJK ... Yi */ - (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */ - (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */ - (c >= 0xfe10 && c <= 0xfe19) || /* Vertical forms */ - (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */ - (c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */ - (c >= 0xffe0 && c <= 0xffe6)); + ((uint)(c - 0xac00) <= (0xd7a3 - 0xac00)) || /* Hangul Syllables */ + ((uint)(c - 0xf900) <= (0xfaff - 0xf900)) || /* CJK Compatibility Ideographs */ + ((uint)(c - 0xfe10) <= (0xfe19 - 0xfe10)) || /* Vertical forms */ + ((uint)(c - 0xfe30) <= (0xfe6f - 0xfe30)) || /* CJK Compatibility Forms */ + ((uint)(c - 0xff00) <= (0xff60 - 0xff00)) || /* Fullwidth Forms */ + ((uint)(c - 0xffe0) <= (0xffe6 - 0xffe0))); + // We can ignore these ranges because .Net strings use surrogate pairs // for this range and we do not handle surrogage pairs. // (c >= 0x20000 && c <= 0x2fffd) || diff --git a/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs b/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs index 8dc21c416c1..880a1c9f5cc 100644 --- a/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs +++ b/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs @@ -72,14 +72,14 @@ protected static int LengthInBufferCells(char c) bool isWide = c >= 0x1100 && (c <= 0x115f || /* Hangul Jamo init. consonants */ c == 0x2329 || c == 0x232a || - (c >= 0x2e80 && c <= 0xa4cf && + ((uint)(c - 0x2e80) <= (0xa4cf - 0x2e80) && c != 0x303f) || /* CJK ... Yi */ - (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */ - (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */ - (c >= 0xfe10 && c <= 0xfe19) || /* Vertical forms */ - (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */ - (c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */ - (c >= 0xffe0 && c <= 0xffe6)); + ((uint)(c - 0xac00) <= (0xd7a3 - 0xac00)) || /* Hangul Syllables */ + ((uint)(c - 0xf900) <= (0xfaff - 0xf900)) || /* CJK Compatibility Ideographs */ + ((uint)(c - 0xfe10) <= (0xfe19 - 0xfe10)) || /* Vertical forms */ + ((uint)(c - 0xfe30) <= (0xfe6f - 0xfe30)) || /* CJK Compatibility Forms */ + ((uint)(c - 0xff00) <= (0xff60 - 0xff00)) || /* Fullwidth Forms */ + ((uint)(c - 0xffe0) <= (0xffe6 - 0xffe0))); // We can ignore these ranges because .Net strings use surrogate pairs // for this range and we do not handle surrogage pairs.