-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Closed
Labels
api-approvedAPI was approved in API review, it can be implementedAPI was approved in API review, it can be implementedarea-System.Buffers
Milestone
Description
EDIT: most up to date proposal can be found here: #28230 (comment)
Most recent API review comment: #28230 (comment)
If you are interested in the original proposal, click "details" below:
Details
To make @KrzysztofCwalina happy. :)
Networking protocols and parsers in particular use ASCII very frequently - even more so than UTF-8. It would be useful to introduce high-performance APIs that work very specifically on ASCII text without trying to interpret it as UTF-8.
using System;
using System.Buffers;
/*
* This work item proposes a new static class for performing operations
* on byte buffers assumed to contain ASCII text. For the purposes of
* these APIs, an ASCII byte / char is any value in the range [ 00 .. 7F ], inclusive;
* and ASCII text is a sequence of such values. Most APIs in this type will
* treat non-ASCII values as opaque data unless the API description specifies
* a different behavior.
*
* For APIs which take both a source and a destination buffer, the behavior of
* the method is undefined if the source and destination buffers overlap,
* unless the API description specifies otherwise. The behavior of all APIs
* is undefined if another thread mutates the buffers while these APIs are
* operating on them.
*
* All case conversion APIs are culture-unaware.
*/
namespace System.Buffers.Text
{
public static class Ascii
{
// Compares two ASCII buffers for equality, optionally treating [A-Z] and [a-z] as equal.
// All non-ASCII bytes / chars are compared for pure binary equivalence.
public static bool Equals(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right);
public static bool Equals(ReadOnlySpan<char> left, ReadOnlySpan<char> right);
public static bool EqualsIgnoreCase(ReadOnlySpan<byte> left, ReadOnlySpan<byte> right);
public static bool EqualsIgnoreCase(ReadOnlySpan<char> left, ReadOnlySpan<char> right);
// Compares an ASCII byte buffer and an ASCII char buffer for equality, optionally treating
// [A-Z] and [a-z] as equal. Returns false if the ASCII byte buffer contains any non-ASCII
// data or if the char buffer contains any element in the range [ 0080 .. FFFF ], as we
// wouldn't know what encoding to use to perform the transcode-then-compare operation.
public static bool Equals(ReadOnlySpan<byte> left, ReadOnlySpan<char> right);
public static bool Equals(ReadOnlySpan<byte> left, string right);
public static bool EqualsIgnoreCase(ReadOnlySpan<byte> left, ReadOnlySpan<char> right);
public static bool EqualsIgnoreCase(ReadOnlySpan<byte> left, string right);
// Searches for the first occurrence of the target substring within the search space,
// optionally treating [A-Z] and [a-z] as equal. All non-ASCII bytes are compared for pure
// binary equivalence. Returns the index of where the first match is found, else returns -1.
// ADDENDUM: Also assume there are *Last equivalents of the following.
public static int IndexOf(ReadOnlySpan<byte> text, ReadOnlySpan<byte> value);
public static int IndexOfIgnoreCase(ReadOnlySpan<byte> text, ReadOnlySpan<byte> value);
// Searches for the first occurrence of the target substring within the search space,
// optionally treating [A-Z] and [a-z] as equal. Returns the index of where the first match
// is found, else returns -1. If the target string contains any non-ASCII chars ([ 0080 .. FFFF ]),
// the search is assume to have failed, and the method returns -1.
// ADDENDUM: Also assume there are *Last equivalents of the following.
public static int IndexOf(ReadOnlySpan<byte> text, ReadOnlySpan<char> value);
public static int IndexOf(ReadOnlySpan<byte> text, string value);
public static int IndexOfIgnoreCase(ReadOnlySpan<byte> text, ReadOnlySpan<char> value);
public static int IndexOfIgnoreCase(ReadOnlySpan<byte> text, string value);
// Given a buffer, returns the index of the first element in the buffer which
// is a non-ASCII byte, or -1 if the buffer is empty or all-ASCII. The bool-
// returning method is a convenience shortcut to perform the same check.
public static int GetIndexOfFirstNonAsciiByte(ReadOnlySpan<byte> buffer);
public static int GetIndexOfFirstNonAsciiChar(ReadOnlySpan<char> buffer);
public static bool IsAllAscii(ReadOnlySpan<byte> buffer);
public static bool IsAllAscii(ReadOnlySpan<char> buffer);
// Returns true iff the provided byte is an ASCII byte; i.e., in the range [ 00 .. 7F ];
// or if the provided char is in the range [ 0000 .. 007F ].
public static bool IsAsciiByte(byte value);
public static bool IsAsciiChar(char value);
// Copies source to destination, converting [A-Z] -> [a-z] or vice versa during
// the copy. All values outside [A-Za-z] - including non-ASCII values - are unchanged
// during the copy.
//
// If source.Length <= destination.Length, succeeds and returns source.Length (# bytes copied).
// If source.Length > destination.Length, returns -1.
public static int ToLowerInvariant(ReadOnlySpan<byte> source, Span<byte> destination);
public static int ToLowerInvariant(ReadOnlySpan<char> source, Span<char> destination);
public static int ToUpperInvariant(ReadOnlySpan<byte> source, Span<byte> destination);
public static int ToUpperInvariant(ReadOnlySpan<char> source, Span<char> destination);
// Performs case conversion ([A-Z] -> [a-z] or vice versa) in-place. All values
// outside [A-Za-z] - including non-ASCII values - are unchanged.
public static void ToLowerInvariantInPlace(Span<byte> buffer);
public static void ToLowerInvariantInPlace(Span<char> buffer);
public static void ToUpperInvariantInPlace(Span<byte> buffer);
public static void ToUpperInvariantInPlace(Span<char> buffer);
// Performs case conversion on a single value, converting [A-Z] -> [a-z] or vice versa.
// All values outside [A-Za-z] - including non-ASCII values - are unchanged.
public static byte ToLowerInvariant(byte value);
public static byte ToLowerInvariant(char value);
public static byte ToUpperInvariant(byte value);
public static byte ToUpperInvariant(char value);
// Returns a hash code for the provided buffer suitable for use in a dictionary or
// other keyed collection. For the OrdinalIgnoreCase method, the values [A-Z] and [a-z]
// are treated as equivalent during hash code computation. All non-ASCII values
// are treated as opaque data. The hash code is randomized but is not guaranteed to
// implement any particular algorithm, nor is it guaranteed to be a member of the same
// PRF family as other GetHashCode routines in the framework.
public static int GetHashCode(ReadOnlySpan<byte> buffer);
public static int GetHashCode(ReadOnlySpan<char> buffer);
public static int GetHashCodeOrdinalIgnoreCase(ReadOnlySpan<byte> buffer);
public static int GetHashCodeOrdinalIgnoreCase(ReadOnlySpan<char> buffer);
// Widens an ASCII buffer to UTF-16 or narrows a UTF-16 buffer to ASCII.
// Returns OperationStatus.InvalidData if the source buffer contains a non-ASCII byte
// or a char in the range [ 0080 .. FFFF ].
// OPEN QUESTION: Should we have an equivalent with Latin-1 semantics? Probably doesn't
// belong on the ASCII class if we do that.
public static OperationStatus WidenToUtf16(ReadOnlySpan<byte> source, Span<char> destination, out int bytesRead, out int charsWritten);
public static OperationStatus NarrowFromUTf16(ReadOnlySpan<char> source, Span<byte> destination, out int charsRead, out int bytesWritten);
// Widens the provided buffer to UTF-16, then returns a new string instance from it.
// Throws ArgumentException if the buffer contains non-ASCII data. If the desired behavior
// is character substitution instead of throwing an exception, consider instead using
// Encoding.ASCII.GetString.
public static string ToString(ReadOnlySpan<byte> buffer);
// Trims only ASCII whitespace values from the buffer, returning the trimmed buffer.
public static ReadOnlySpan<byte> Trim(ReadOnlySpan<byte> buffer);
public static ReadOnlySpan<char> Trim(ReadOnlySpan<char> buffer);
public static ReadOnlySpan<byte> TrimStart(ReadOnlySpan<byte> buffer);
public static ReadOnlySpan<char> TrimStart(ReadOnlySpan<char> buffer);
public static ReadOnlySpan<byte> TrimEnd(ReadOnlySpan<byte> buffer);
public static ReadOnlySpan<char> TrimEnd(ReadOnlySpan<char> buffer);
// Similar to Trim, but returns the Range of the untrimmed data. Useful for
// scenarios where the resulting ROS<byte> needs to be mapped back to some
// container data structure.
// Example: Assume ROM<byte> mem = GetSomeAsciiData();
// Then ROM<byte> trimmed = mem[Ascii.GetTrimmedRange(mem.Span)];
public static Range GetTrimmedRange(ReadOnlySpan<byte> buffer);
public static Range GetTrimmedRange(ReadOnlySpan<char> buffer);
}
}Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
api-approvedAPI was approved in API review, it can be implementedAPI was approved in API review, it can be implementedarea-System.Buffers