Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;
#if !NET8_0_OR_GREATER
using System.Runtime.CompilerServices;
#endif

namespace System.Collections.Frozen
{
Expand Down Expand Up @@ -59,7 +57,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
int maxSubstringLength = Math.Min(minLength, MaxSubstringLengthLimit);
for (int count = 1; count <= maxSubstringLength; count++)
{
comparer.IsLeft = true;
comparer.Slicer = SliceLeft;
comparer.Count = count;

// For each index, get a uniqueness factor for the left-justified substrings.
Expand All @@ -71,8 +69,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
if (HasSufficientUniquenessFactor(set, uniqueStrings))
{
results = CreateAnalysisResults(
uniqueStrings, ignoreCase, minLength, maxLength, index, count,
static (string s, int index, int count) => s.AsSpan(index, count));
uniqueStrings, ignoreCase, minLength, maxLength, index, count, comparer.Slicer);
return true;
}
}
Expand All @@ -83,8 +80,8 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
// right-justified substrings, and so we also check right-justification.
if (minLength != maxLength)
{
// toggle the direction and re-use the comparer and hashset (HasSufficientUniquenessFactor clears it)
comparer.IsLeft = false;
// toggle the direction and re-use the comparer and HashSet (HasSufficientUniquenessFactor clears it)
comparer.Slicer = SliceRight;

// For each index, get a uniqueness factor for the right-justified substrings.
// If any is above our threshold, we're done.
Expand All @@ -96,8 +93,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
if (HasSufficientUniquenessFactor(set, uniqueStrings))
{
results = CreateAnalysisResults(
uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count,
static (string s, int index, int count) => s.AsSpan(s.Length + index, count));
uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count, comparer.Slicer);
return true;
}
}
Expand All @@ -110,7 +106,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
}

private static AnalysisResults CreateAnalysisResults(
ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count, GetSpan getSubstringSpan)
ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count, GetSpan slicer)
{
// Start off by assuming all strings are ASCII
bool allAsciiIfIgnoreCase = true;
Expand All @@ -124,12 +120,12 @@ private static AnalysisResults CreateAnalysisResults(
// actually perform the comparison as case-sensitive even if case-insensitive
// was requested, as there's nothing that would compare equally to the substring
// other than the substring itself.
bool canSwitchIgnoreCaseToCaseSensitive = ignoreCase;
bool canSwitchIgnoreCaseToCaseSensitive = true;
Copy link
Contributor Author

@IDisposable IDisposable Jun 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is demonstrably true because we're inside the if (ignoreCase) immediately above and could be a distinct PR, but it actually changes nothing but the readability and not hoping the Compiler/JIT groks this truth.


foreach (string s in uniqueStrings)
{
// Get the span for the substring.
ReadOnlySpan<char> substring = getSubstringSpan(s, index, count);
ReadOnlySpan<char> substring = slicer(s, index, count);

// If the substring isn't ASCII, bail out to return the results.
if (!IsAllAscii(substring))
Expand Down Expand Up @@ -263,25 +259,32 @@ public AnalysisResults(bool ignoreCase, bool allAsciiIfIgnoreCase, int hashIndex
public bool RightJustifiedSubstring => HashIndex < 0;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ReadOnlySpan<char> SliceLeft(string s, int index, int count) => s.AsSpan(index, count);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ReadOnlySpan<char> SliceRight(string s, int index, int count) => s.AsSpan(s.Length + index, count);

private abstract class SubstringComparer : IEqualityComparer<string>
{
public int Index;
public int Count;
public bool IsLeft;
public GetSpan Slicer = SliceLeft;

public abstract bool Equals(string? x, string? y);
public abstract int GetHashCode(string s);
}

private sealed class JustifiedSubstringComparer : SubstringComparer
{
public override bool Equals(string? x, string? y) => x.AsSpan(IsLeft ? Index : (x!.Length + Index), Count).SequenceEqual(y.AsSpan(IsLeft ? Index : (y!.Length + Index), Count));
public override int GetHashCode(string s) => Hashing.GetHashCodeOrdinal(s.AsSpan(IsLeft ? Index : (s.Length + Index), Count));
public override bool Equals(string? x, string? y) => Slicer(x!, Index, Count).SequenceEqual(Slicer(y!, Index, Count));
Copy link
Contributor Author

@IDisposable IDisposable Jun 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we can just call Slicer, which no longer has an IsLeft conditional test inside a hot loop, so calling through to the GetSpan-like method that was set on lines 60 (for the IsLeft == true logic) and 84 (for the IsLeft == false logic). This (to my reasoning) will result in much better low-level branch-prediction and since the actual delegate bodies are [MethodImpl(MethodImplOptions.AggressiveInlining)] they should be inlined stubs.

We also get to reuse those stubs when we do the construction (instead of "similar" ones coded up for the calls to CreateAnalysisResults)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not confident about the various not-null hints ! that I injected, but I think they're right, if not overkill. Not sure it really would affect the code generated and welcome input on that from low-level ninjas

public override int GetHashCode(string s) => Hashing.GetHashCodeOrdinal(Slicer(s, Index, Count));
}

private sealed class JustifiedCaseInsensitiveSubstringComparer : SubstringComparer
{
public override bool Equals(string? x, string? y) => x.AsSpan(IsLeft ? Index : (x!.Length + Index), Count).Equals(y.AsSpan(IsLeft ? Index : (y!.Length + Index), Count), StringComparison.OrdinalIgnoreCase);
public override int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCase(s.AsSpan(IsLeft ? Index : (s.Length + Index), Count));
public override bool Equals(string? x, string? y) => Slicer(x!, Index, Count).Equals(Slicer(y!, Index, Count), StringComparison.OrdinalIgnoreCase);
public override int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCase(Slicer(s, Index, Count));
}
}
}