Skip to content

Commit

Permalink
Fix several bugs around the 'byteString' family of Builders (#671)
Browse files Browse the repository at this point in the history
* Fix several bugs around the 'byteString' family of Builders

* Add Note [byteStringCopyStep and wrappedBytesCopyStep]

This makes explicit the reasoning for in what sense
"ensur[ing] that the common case is not recursive" is expected to
possibly "yield[] better code."
  • Loading branch information
clyring authored and Bodigrim committed Oct 15, 2024
1 parent 1f3d4cc commit c0ad4c8
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 41 deletions.
93 changes: 71 additions & 22 deletions Data/ByteString/Builder/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ import Data.Semigroup (Semigroup(..))
import Data.List.NonEmpty (NonEmpty(..))

import qualified Data.ByteString as S
import qualified Data.ByteString.Unsafe as S
import qualified Data.ByteString.Internal.Type as S
import qualified Data.ByteString.Lazy.Internal as L
import qualified Data.ByteString.Short.Internal as Sh
Expand Down Expand Up @@ -819,24 +820,24 @@ ensureFree minFree =
| ope `minusPtr` op < minFree = return $ bufferFull minFree op k
| otherwise = k br

-- | Copy the bytes from a 'BufferRange' into the output stream.
wrappedBytesCopyStep :: BufferRange -- ^ Input 'BufferRange'.
-- | Copy the bytes from a 'S.StrictByteString' into the output stream.
wrappedBytesCopyStep :: S.StrictByteString -- ^ Input 'S.StrictByteString'.
-> BuildStep a -> BuildStep a
wrappedBytesCopyStep (BufferRange ip0 ipe) k =
go ip0
-- See Note [byteStringCopyStep and wrappedBytesCopyStep]
wrappedBytesCopyStep bs0 k =
go bs0
where
go !ip (BufferRange op ope)
go !bs@(S.BS ifp inpRemaining) (BufferRange op ope)
| inpRemaining <= outRemaining = do
copyBytes op ip inpRemaining
S.unsafeWithForeignPtr ifp $ \ip -> copyBytes op ip inpRemaining
let !br' = BufferRange (op `plusPtr` inpRemaining) ope
k br'
| otherwise = do
copyBytes op ip outRemaining
let !ip' = ip `plusPtr` outRemaining
return $ bufferFull 1 ope (go ip')
S.unsafeWithForeignPtr ifp $ \ip -> copyBytes op ip outRemaining
let !bs' = S.unsafeDrop outRemaining bs
return $ bufferFull 1 ope (go bs')
where
outRemaining = ope `minusPtr` op
inpRemaining = ipe `minusPtr` ip


-- Strict ByteStrings
Expand All @@ -857,7 +858,7 @@ byteStringThreshold :: Int -> S.StrictByteString -> Builder
byteStringThreshold maxCopySize =
\bs -> builder $ step bs
where
step bs@(S.BS _ len) !k br@(BufferRange !op _)
step bs@(S.BS _ len) k br@(BufferRange !op _)
| len <= maxCopySize = byteStringCopyStep bs k br
| otherwise = return $ insertChunk op bs k

Expand All @@ -871,21 +872,69 @@ byteStringThreshold maxCopySize =
byteStringCopy :: S.StrictByteString -> Builder
byteStringCopy = \bs -> builder $ byteStringCopyStep bs

{-
Note [byteStringCopyStep and wrappedBytesCopyStep]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A Builder that copies the contents of an arbitrary ByteString needs a
recursive loop, since the bytes to be copied might not fit into the
first few chunk buffers provided by the driver. That loop is
implemented in 'wrappedBytesCopyStep'. But we also have a
non-recursive wrapper, 'byteStringCopyStep', which performs exactly
the first iteration of that loop, falling back to 'wrappedBytesCopyStep'
if a chunk boundary is reached before the entire ByteString is copied.
This is very strange! Why do we do this? Perhaps mostly for
historical reasons. But sadly, changing this to use a single
recursive loop regresses the benchmark 'foldMap byteStringCopy' by
about 30% as of 2024, in one of two ways:
1. If the continuation 'k' is taken as an argument of the
inner copying loop, it remains an unknown function call.
So for each bytestring copied, that continuation must be
entered later via a gen-apply function, which incurs dozens
of cycles of extra overhead.
2. If the continuation 'k' is lifted out of the inner copying
loop, it becomes a free variable. And after a bit of
inlining, there will be no unknown function call. But, if
the continuation function has any free variables, these
become free variables of the inner copying loop, which
prevent the loop from floating out. (In the actual
benchmark, the tail of the list of bytestrings to copy is
such a free variable of the continuation.) As a result,
the inner copying loop becomes a function closure object
rather than a top-level function. And that means a new
inner-copying-loop function-closure-object must be
allocated on the heap for every bytestring copied, which
is expensive.
In theory, GHC's late-lambda-lifting pass can clean this up by
abstracting over the problematic free variables. But for some
unknown reason (perhaps a bug in ghc-9.10.1) this optimization
does not fire on the relevant benchmark code, even with a
sufficiently high value of -fstg-lift-lams-rec-args.
Alternatively, it is possible to avoid recursion altogether by
requesting that the next chunk be large enough to accommodate the
entire remainder of the input when a chunk boundary is reached.
But:
* For very large ByteStrings, this may incur unwanted latency.
* Large next-chunk-size requests have caused breakage downstream
in the past. See also https://github.com/yesodweb/wai/issues/894
-}

{-# INLINE byteStringCopyStep #-}
byteStringCopyStep :: S.StrictByteString -> BuildStep a -> BuildStep a
byteStringCopyStep (S.BS ifp isize) !k0 br0@(BufferRange op ope)
-- Ensure that the common case is not recursive and therefore yields
-- better code.
| op' <= ope = do copyBytes op ip isize
touchForeignPtr ifp
k0 (BufferRange op' ope)
| otherwise = wrappedBytesCopyStep (BufferRange ip ipe) k br0
-- See Note [byteStringCopyStep and wrappedBytesCopyStep]
byteStringCopyStep bs@(S.BS ifp isize) k br@(BufferRange op ope)
| isize <= osize = do
S.unsafeWithForeignPtr ifp $ \ip -> copyBytes op ip isize
k (BufferRange op' ope)
| otherwise = wrappedBytesCopyStep bs k br
where
osize = ope `minusPtr` op
op' = op `plusPtr` isize
ip = unsafeForeignPtrToPtr ifp
ipe = ip `plusPtr` isize
k br = do touchForeignPtr ifp -- input consumed: OK to release here
k0 br

-- | Construct a 'Builder' that always inserts the 'S.StrictByteString'
-- directly as a chunk.
Expand Down
39 changes: 24 additions & 15 deletions tests/Properties/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ tests =
, testProperty "toChunks . fromChunks" $
\xs -> B.toChunks (B.fromChunks xs) === filter (/= mempty) xs
, testProperty "append lazy" $
\(toElem -> c) -> B.head (B.singleton c <> undefined) === c
\(toElem -> c) -> B.head (B.singleton c <> tooStrictErr) === c
, testProperty "compareLength 1" $
\x -> B.compareLength x (B.length x) === EQ
, testProperty "compareLength 2" $
Expand All @@ -379,13 +379,13 @@ tests =
, testProperty "compareLength 5" $
\x (intToIndexTy -> n) -> B.compareLength x n === compare (B.length x) n
, testProperty "dropEnd lazy" $
\(toElem -> c) -> B.take 1 (B.dropEnd 1 (B.singleton c <> B.singleton c <> B.singleton c <> undefined)) === B.singleton c
\(toElem -> c) -> B.take 1 (B.dropEnd 1 (B.singleton c <> B.singleton c <> B.singleton c <> tooStrictErr)) === B.singleton c
, testProperty "dropWhileEnd lazy" $
\(toElem -> c) -> B.take 1 (B.dropWhileEnd (const False) (B.singleton c <> undefined)) === B.singleton c
\(toElem -> c) -> B.take 1 (B.dropWhileEnd (const False) (B.singleton c <> tooStrictErr)) === B.singleton c
, testProperty "breakEnd lazy" $
\(toElem -> c) -> B.take 1 (fst $ B.breakEnd (const True) (B.singleton c <> undefined)) === B.singleton c
\(toElem -> c) -> B.take 1 (fst $ B.breakEnd (const True) (B.singleton c <> tooStrictErr)) === B.singleton c
, testProperty "spanEnd lazy" $
\(toElem -> c) -> B.take 1 (fst $ B.spanEnd (const False) (B.singleton c <> undefined)) === B.singleton c
\(toElem -> c) -> B.take 1 (fst $ B.spanEnd (const False) (B.singleton c <> tooStrictErr)) === B.singleton c
#endif

, testProperty "length" $
Expand Down Expand Up @@ -604,12 +604,21 @@ tests =
# ifdef BYTESTRING_LAZY
-- Don't use (===) in these laziness tests:
-- We don't want printing the test case to fail!
, testProperty "zip is lazy" $ lazyZipTest $
\x y -> B.zip x y == zip (B.unpack x) (B.unpack y)
, testProperty "zipWith is lazy" $ \f -> lazyZipTest $
\x y -> (B.zipWith f x y :: [Int]) == zipWith f (B.unpack x) (B.unpack y)
, testProperty "packZipWith is lazy" $ \f -> lazyZipTest $
\x y -> B.unpack (B.packZipWith ((toElem .) . f) x y) == zipWith ((toElem .) . f) (B.unpack x) (B.unpack y)
, testProperty "zip is lazy in the longer input" $ zipLazyInLongerInputTest $
\x y -> B.zip x y == zip (B.unpack x) (B.unpack y)
, testProperty "zipWith is lazy in the longer input" $
\f -> zipLazyInLongerInputTest $
\x y -> (B.zipWith f x y :: [Int]) == zipWith f (B.unpack x) (B.unpack y)
, testProperty "packZipWith is lazy in the longer input" $
\f -> zipLazyInLongerInputTest $
\x y -> B.unpack (B.packZipWith ((toElem .) . f) x y) == zipWith ((toElem .) . f) (B.unpack x) (B.unpack y)
, testProperty "zip is maximally lazy" $ \x y ->
zip (B.unpack x) (B.unpack y) `List.isPrefixOf`
B.zip (x <> tooStrictErr) (y <> tooStrictErr)
, testProperty "zipWith is maximally lazy" $ \f x y ->
zipWith f (B.unpack x) (B.unpack y) `List.isPrefixOf`
B.zipWith @Int f (x <> tooStrictErr) (y <> tooStrictErr)
-- (It's not clear if packZipWith is required to be maximally lazy.)
# endif
, testProperty "unzip" $
\(fmap (toElem *** toElem) -> xs) -> (B.unpack *** B.unpack) (B.unzip xs) === unzip xs
Expand Down Expand Up @@ -807,15 +816,15 @@ readIntegerUnsigned xs = case readMaybe ys of
#endif

#ifdef BYTESTRING_LAZY
lazyZipTest
zipLazyInLongerInputTest
:: Testable prop
=> (BYTESTRING_TYPE -> BYTESTRING_TYPE -> prop)
-> BYTESTRING_TYPE -> BYTESTRING_TYPE -> Property
lazyZipTest fun = \x0 y0 -> let
zipLazyInLongerInputTest fun = \x0 y0 -> let
msg = "Input chunks are: " ++ show (B.toChunks x0, B.toChunks y0)
(x, y) | B.length x0 <= B.length y0
= (x0, y0 <> error "too strict")
= (x0, y0 <> tooStrictErr)
| otherwise
= (x0 <> error "too strict", y0)
= (x0 <> tooStrictErr, y0)
in counterexample msg (fun x y)
#endif
6 changes: 6 additions & 0 deletions tests/QuickCheckUtils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module QuickCheckUtils
, CByteString(..)
, Sqrt(..)
, int64OK
, tooStrictErr
) where

import Test.Tasty.QuickCheck
Expand All @@ -19,6 +20,7 @@ import Data.Int
import System.IO
import Foreign.C (CChar)
import GHC.TypeLits (TypeError, ErrorMessage(..))
import GHC.Stack (withFrozenCallStack, HasCallStack)

import qualified Data.ByteString.Short as SB
import qualified Data.ByteString as P
Expand Down Expand Up @@ -134,3 +136,7 @@ instance {-# OVERLAPPING #-}
-- defined in "QuickCheckUtils".
int64OK :: (Arbitrary a, Show a, Testable b) => (a -> b) -> Property
int64OK f = propertyForAllShrinkShow arbitrary shrink (\v -> [show v]) f

tooStrictErr :: forall a. HasCallStack => a
tooStrictErr = withFrozenCallStack $
error "A lazy sub-expression was unexpectedly evaluated"
50 changes: 46 additions & 4 deletions tests/builder/Data/ByteString/Builder/Tests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import Control.Monad.Trans.State (StateT, evalStateT, evalState, put,
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.Writer (WriterT, execWriterT, tell)

import Foreign (minusPtr)
import Foreign (minusPtr, castPtr, ForeignPtr, withForeignPtr, Int64)

import Data.Char (chr)
import Data.Bits ((.|.), shiftL)
Expand All @@ -40,7 +40,6 @@ import Data.ByteString.Builder.Prim.TestUtils

import Control.Exception (evaluate)
import System.IO (openTempFile, hPutStr, hClose, hSetBinaryMode, hSetEncoding, utf8, hSetNewlineMode, noNewlineTranslation)
import Foreign (ForeignPtr, withForeignPtr, castPtr)
import Foreign.C.String (withCString)
import Numeric (showFFloat)
import System.Posix.Internals (c_unlink)
Expand All @@ -50,7 +49,8 @@ import Test.Tasty.QuickCheck
( Arbitrary(..), oneof, choose, listOf, elements
, counterexample, ioProperty, Property, testProperty
, (===), (.&&.), conjoin, forAll, forAllShrink
, UnicodeString(..), NonNegative(..)
, UnicodeString(..), NonNegative(..), Positive(..)
, mapSize, (==>)
)
import QuickCheckUtils

Expand All @@ -70,7 +70,8 @@ tests =
testsASCII ++
testsFloating ++
testsChar8 ++
testsUtf8
testsUtf8 ++
[testLaziness]


------------------------------------------------------------------------------
Expand Down Expand Up @@ -981,3 +982,44 @@ testsUtf8 =
[ testBuilderConstr "charUtf8" charUtf8_list charUtf8
, testBuilderConstr "stringUtf8" (foldMap charUtf8_list) stringUtf8
]

testLaziness :: TestTree
testLaziness = testGroup "Builder laziness"
[ testProperty "byteString" $ mapSize (+ 10) $
\bs (Positive chunkSize) ->
let strategy = safeStrategy chunkSize chunkSize
lbs = toLazyByteStringWith strategy L.empty
(byteString bs <> tooStrictErr)
in (S.length bs > max chunkSize 8) ==> L.head lbs == S.head bs
, testProperty "byteStringCopy" $ mapSize (+ 10) $
\bs (Positive chunkSize) ->
let strategy = safeStrategy chunkSize chunkSize
lbs = toLazyByteStringWith strategy L.empty
(byteStringCopy bs <> tooStrictErr)
in (S.length bs > max chunkSize 8) ==> L.head lbs == S.head bs
, testProperty "byteStringInsert" $ mapSize (+ 10) $
\bs (Positive chunkSize) ->
let strategy = safeStrategy chunkSize chunkSize
lbs = toLazyByteStringWith strategy L.empty
(byteStringInsert bs <> tooStrictErr)
in L.take (fromIntegral @Int @Int64 (S.length bs)) lbs
== L.fromStrict bs
, testProperty "lazyByteString" $ mapSize (+ 10) $
\bs (Positive chunkSize) ->
let strategy = safeStrategy chunkSize chunkSize
lbs = toLazyByteStringWith strategy L.empty
(lazyByteString bs <> tooStrictErr)
in (L.length bs > fromIntegral @Int @Int64 (max chunkSize 8))
==> L.head lbs == L.head bs
, testProperty "shortByteString" $ mapSize (+ 10) $
\bs (Positive chunkSize) ->
let strategy = safeStrategy chunkSize chunkSize
lbs = toLazyByteStringWith strategy L.empty
(shortByteString bs <> tooStrictErr)
in (Sh.length bs > max chunkSize 8) ==> L.head lbs == Sh.head bs
, testProperty "flush" $ \recipe -> let
!(b, toLBS) = recipeComponents recipe
!lbs1 = toLazyByteString b
!lbs2 = L.take (L.length lbs1) (toLBS $ b <> flush <> tooStrictErr)
in lbs1 == lbs2
]

0 comments on commit c0ad4c8

Please sign in to comment.