Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
lassevk committed May 2, 2024
1 parent 94e6e67 commit b98ce77
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 6 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,13 @@ This is intended to be a new major release of [DiffLib](https://github.com/lasse
written in .NET 8, C# 12, with major performance improvements, based on new
types, such as `Span<T>` and similar.


Status
---

Current status of project is "on hold". `(ReadOnly)Span<T>` is severly limited when dealing with recursive algorithms,
as there is no way to cache intermediate results.

A recursive enumerator can easily keep state like this, but since the entire stack unwinds and all local spans are
discarded, and cannot be temporarily stored between invocations, it seems I need a completely new way to think about
this recursive algorithm for this to work.
51 changes: 45 additions & 6 deletions src/DiffLib2/SegmentDiffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ public ref struct SegmentDiffer<T>
private ReadOnlySpan<T> _right;
private readonly IEqualityComparer<T> _comparer;

private int?[]? _leftHashCodes;
private int _leftHashCodesOffset;

private int?[]? _rightHashCodes;
private int _rightHashCodesOffset;

public SegmentDiffer(ReadOnlySpan<T> left, ReadOnlySpan<T> right, IEqualityComparer<T> comparer)
{
_left = left;
Expand Down Expand Up @@ -42,13 +48,19 @@ private bool TraverseForSubstrings(int leftStart, int leftEnd, int rightStart, i
{
for (int rightOffset = rightStart; rightOffset < rightEnd; rightOffset++)
{
if (LeftHashCode(leftOffset) != RightHashCode(rightOffset))
continue;

if (!_comparer.Equals(_left[leftOffset], _right[rightOffset]))
continue;

var length = 1;
int maxPossibleLength = Math.Min(leftEnd - leftStart - leftOffset, rightEnd - rightStart - rightOffset);
while (length < maxPossibleLength)
{
if (LeftHashCode(leftOffset + length) != RightHashCode(rightOffset + length))
break;

if (!_comparer.Equals(_left[leftOffset + length], _right[rightOffset + length]))
break;

Expand All @@ -67,25 +79,28 @@ private bool TraverseForSubstrings(int leftStart, int leftEnd, int rightStart, i
if (longestLength == 0)
{
segment = new DiffSegment<T>(_left[leftStart .. leftEnd], _right[rightStart .. rightEnd], false);
Advance(leftEnd - leftStart, rightEnd - rightStart);
Advance(segment.Left.Length, segment.Right.Length);
return true;
}

if (longestLeftStart == 0 && longestRightStart == 0)
{
segment = new DiffSegment<T>(_left[..longestLength], _right[..longestLength], true);
Advance(segment.Left.Length, segment.Right.Length);
return true;
}

if (longestLeftStart == 0)
{
segment = new DiffSegment<T>(_left[..0], _right[..longestRightStart], false);
Advance(segment.Left.Length, segment.Right.Length);
return true;
}

if (longestRightStart == 0)
{
segment = new DiffSegment<T>(_left[..longestLeftStart], _right[..0], false);
Advance(segment.Left.Length, segment.Right.Length);
return true;
}

Expand All @@ -98,7 +113,7 @@ private bool IsMatch(out DiffSegment<T> segment)
if (match > 0)
{
segment = new DiffSegment<T>(_left[..match], _right[..match], true);
Advance(match);
Advance(segment.Left.Length, segment.Right.Length);
return true;
}

Expand All @@ -111,7 +126,7 @@ private bool EitherSideIsEmpty(out DiffSegment<T> segment)
if (_left.Length == 0 || _right.Length == 0)
{
segment = new DiffSegment<T>(_left, _right, false);
Advance(_left.Length, _right.Length);
Advance(segment.Left.Length, segment.Right.Length);
return true;
}

Expand All @@ -125,11 +140,13 @@ private bool IsEmpty(out DiffSegment<T> segment)
return _left.Length == 0 && _right.Length == 0;
}

private void Advance(int length) => Advance(length, length);
private void Advance(int leftLength, int rightLength)
{
_left = _left[leftLength..];
_right = _right[rightLength..];
_left = _left[leftLength ..];
_right = _right[rightLength ..];

_leftHashCodesOffset += leftLength;
_rightHashCodesOffset += rightLength;
}

private int Match()
Expand All @@ -143,4 +160,26 @@ private int Match()
return index;
}
}

private int LeftHashCode(int offset)
{
if (_leftHashCodes is null)
{
_leftHashCodes = new int?[_left.Length];
_leftHashCodesOffset = 0;
}

return _leftHashCodes[_leftHashCodesOffset + offset] ??= _left[offset]?.GetHashCode() ?? 0;
}

private int RightHashCode(int offset)
{
if (_rightHashCodes is null)
{
_rightHashCodes = new int?[_right.Length];
_rightHashCodesOffset = 0;
}

return _rightHashCodes[_rightHashCodesOffset + offset] ??= _right[offset]?.GetHashCode() ?? 0;
}
}

0 comments on commit b98ce77

Please sign in to comment.