From 64b9927e13c297023d8110bcb24719be8747e981 Mon Sep 17 00:00:00 2001 From: Benjamin Hodgson <3857587+benjamin-hodgson@users.noreply.github.com> Date: Wed, 31 Jul 2024 18:33:07 +0000 Subject: [PATCH] Use long for locations. Fixes #161 --- Pidgin/ParseState.ComputeSourcePos.cs | 21 +++++++++++---- Pidgin/ParseState.cs | 38 ++++++++++++++------------- Pidgin/Parser.CurrentOffset.cs | 12 ++++++--- 3 files changed, 45 insertions(+), 26 deletions(-) diff --git a/Pidgin/ParseState.ComputeSourcePos.cs b/Pidgin/ParseState.ComputeSourcePos.cs index e3a9794..00ca6ae 100644 --- a/Pidgin/ParseState.ComputeSourcePos.cs +++ b/Pidgin/ParseState.ComputeSourcePos.cs @@ -8,7 +8,7 @@ namespace Pidgin; public partial struct ParseState { - private SourcePosDelta ComputeSourcePosDeltaAt(int location) + private SourcePosDelta ComputeSourcePosDeltaAt(long location) { if (location < _lastSourcePosDeltaLocation) { @@ -27,12 +27,19 @@ private SourcePosDelta ComputeSourcePosDeltaAt(int location) } else if (ReferenceEquals(_sourcePosCalculator, DefaultConfiguration.Instance.SourcePosCalculator)) { + // expect delta to be small enough to fit in an int since we're subtracting + var delta = (int)(location - _lastSourcePosDeltaLocation); + // _sourcePosCalculator just increments the col - return new SourcePosDelta(_lastSourcePosDelta.Lines, _lastSourcePosDelta.Cols + location - _lastSourcePosDeltaLocation); + return new SourcePosDelta(_lastSourcePosDelta.Lines, _lastSourcePosDelta.Cols + delta); } var pos = _lastSourcePosDelta; - for (var i = _lastSourcePosDeltaLocation - _bufferStartLocation; i < location - _bufferStartLocation; i++) + + // expect start and end to be small enough to fit in an int since we're subtracting + var start = (int)(_lastSourcePosDeltaLocation - _bufferStartLocation); + var end = (int)(location - _bufferStartLocation); + for (var i = start; i < end; i++) { pos += _sourcePosCalculator(_span[i]); } @@ -40,13 +47,17 @@ private SourcePosDelta ComputeSourcePosDeltaAt(int location) return pos; } - private SourcePosDelta ComputeSourcePosAt_CharDefault(int location) + private SourcePosDelta ComputeSourcePosAt_CharDefault(long location) { + // expect start and end to be small enough to fit in an int since we're subtracting + var start = (int)(_lastSourcePosDeltaLocation - _bufferStartLocation); + var end = (int)(location - _lastSourcePosDeltaLocation); + // coerce _span to Span var input = MemoryMarshal.CreateSpan( ref Unsafe.As(ref MemoryMarshal.GetReference(_span)), _span.Length - ).Slice(_lastSourcePosDeltaLocation - _bufferStartLocation, location - _lastSourcePosDeltaLocation); + ).Slice(start, end); var lines = 0; var cols = 0; diff --git a/Pidgin/ParseState.cs b/Pidgin/ParseState.cs index e201b53..4e7430d 100644 --- a/Pidgin/ParseState.cs +++ b/Pidgin/ParseState.cs @@ -34,16 +34,16 @@ public ref partial struct ParseState private readonly ITokenStream? _stream; private readonly int _bufferChunkSize; - private TToken[]? _buffer; + private TToken[]? _buffer; // to return to the pool private ReadOnlySpan _span; - private int _keepFromLocation; // leftmost bookmark which hasn't been discarded - private int _bufferStartLocation; // how many tokens had been consumed up to the start of the buffer? - private int _currentIndex; + private long _keepFromLocation; // leftmost bookmark which hasn't been discarded + private long _bufferStartLocation; // how many tokens had been consumed up to the start of the buffer? + private int _currentIndex; // index into the _span private int _bufferedCount; private int _numberOfBookmarks; - private int _lastSourcePosDeltaLocation; + private long _lastSourcePosDeltaLocation; private SourcePosDelta _lastSourcePosDelta; internal ParseState(IConfiguration configuration, ReadOnlySpan span) @@ -104,7 +104,7 @@ internal ParseState(IConfiguration configuration, ITokenStream s /// Returns the total number of tokens which have been consumed. /// In other words, the current absolute offset of the input stream. /// - public int Location + public long Location { [MethodImpl(MethodImplOptions.AggressiveInlining)] get @@ -190,13 +190,6 @@ private void Buffer(int readAhead) if (readAheadTo >= _bufferedCount && _stream != null) { // we're about to read past the end of the current chunk. Pull a new chunk from the stream - var keepSeenLength = _keepFromLocation >= 0 - ? Location - _keepFromLocation - : 0; - var keepFrom = _currentIndex - keepSeenLength; - var keepLength = _bufferedCount - keepFrom; - var amountToRead = Math.Max(_bufferChunkSize, readAheadTo - _bufferedCount); - var newBufferLength = keepLength + amountToRead; /* _currentIndex * | @@ -204,7 +197,7 @@ private void Buffer(int readAhead) * keepFrom | | * | | | readAheadTo * | | | | - * abcdefghijklmnopqrstuvwxyz + * input: abcdefghijklmnopqrstuvwxyz * readAhead |-----------| * keepSeenLength |------| * keepLength |-------------| @@ -212,6 +205,14 @@ private void Buffer(int readAhead) * newBufferLength |------------------| */ + // expect keepSeenLength to be small enough to fit in an int since we're subtracting + var keepSeenLength = _keepFromLocation >= 0 + ? (int)(Location - _keepFromLocation) + : 0; + var keepFrom = _currentIndex - keepSeenLength; + var keepLength = _bufferedCount - keepFrom; + var amountToRead = Math.Max(_bufferChunkSize, readAheadTo - _bufferedCount); + var newBufferLength = keepLength + amountToRead; UpdateLastSourcePosDelta(); if (newBufferLength > _buffer!.Length) @@ -243,7 +244,7 @@ private void Buffer(int readAhead) /// Start buffering the input. /// The location of the bookmark. - public int Bookmark() + public long Bookmark() { if (_keepFromLocation < 0) { @@ -257,7 +258,7 @@ public int Bookmark() /// Stop buffering the input. /// The location of the bookmark. - public void DiscardBookmark(int bookmark) + public void DiscardBookmark(long bookmark) { if (bookmark < _keepFromLocation || bookmark > Location || _numberOfBookmarks <= 0) { @@ -274,9 +275,10 @@ public void DiscardBookmark(int bookmark) /// Return to a bookmark previously obtained from and discard it. /// The location of the bookmark. - public void Rewind(int bookmark) + public void Rewind(long bookmark) { - var delta = Location - bookmark; + // expect delta to be small enough to fit in an int since we're subtracting + var delta = (int)(Location - bookmark); if (delta > _currentIndex) { diff --git a/Pidgin/Parser.CurrentOffset.cs b/Pidgin/Parser.CurrentOffset.cs index 29dfc6f..2330159 100644 --- a/Pidgin/Parser.CurrentOffset.cs +++ b/Pidgin/Parser.CurrentOffset.cs @@ -5,13 +5,19 @@ public static partial class Parser /// /// A parser which returns the number of input tokens which have been consumed. /// - public static Parser CurrentOffset { get; } + public static Parser CurrentOffsetLong { get; } = new CurrentOffsetParser(); + + /// + /// A parser which returns the number of input tokens which have been consumed. + /// + public static Parser CurrentOffset { get; } + = CurrentOffsetLong.Select(x => (int)x); } -internal sealed class CurrentOffsetParser : Parser +internal sealed class CurrentOffsetParser : Parser { - public sealed override bool TryParse(ref ParseState state, ref PooledList> expecteds, out int result) + public sealed override bool TryParse(ref ParseState state, ref PooledList> expecteds, out long result) { result = state.Location; return true;