From aaee912f22bceac8f92961261e4678b0019885be Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Thu, 11 Aug 2022 15:57:08 +0530 Subject: [PATCH 1/2] Create a NonFailing parsers module --- .../Data/Array/Stream/Fold/Foreign.hs | 2 +- .../Streamly/Internal/Data/Parser/ParserD.hs | 1 + .../Data/Parser/ParserD/NonFailing.hs | 213 ++++++++++++++++++ .../Internal/Data/Parser/ParserD/Type.hs | 178 +-------------- core/streamly-core.cabal | 1 + streamly.cabal | 2 + 6 files changed, 221 insertions(+), 176 deletions(-) create mode 100644 core/src/Streamly/Internal/Data/Parser/ParserD/NonFailing.hs diff --git a/core/src/Streamly/Internal/Data/Array/Stream/Fold/Foreign.hs b/core/src/Streamly/Internal/Data/Array/Stream/Fold/Foreign.hs index b8a7499b54..a274df6b99 100644 --- a/core/src/Streamly/Internal/Data/Array/Stream/Fold/Foreign.hs +++ b/core/src/Streamly/Internal/Data/Array/Stream/Fold/Foreign.hs @@ -70,7 +70,7 @@ import Streamly.Internal.Data.Tuple.Strict (Tuple'(..)) import qualified Streamly.Internal.Data.Array.Unboxed as Array import qualified Streamly.Internal.Data.Fold as Fold import qualified Streamly.Internal.Data.Parser.ParserD as ParserD -import qualified Streamly.Internal.Data.Parser.ParserD.Type as ParserD +import qualified Streamly.Internal.Data.Parser.ParserD.NonFailing as ParserD import qualified Streamly.Internal.Data.Parser as Parser import Prelude hiding (concatMap, take) diff --git a/core/src/Streamly/Internal/Data/Parser/ParserD.hs b/core/src/Streamly/Internal/Data/Parser/ParserD.hs index b48c2ea6c3..3134e52818 100644 --- a/core/src/Streamly/Internal/Data/Parser/ParserD.hs +++ b/core/src/Streamly/Internal/Data/Parser/ParserD.hs @@ -214,6 +214,7 @@ import qualified Streamly.Internal.Data.Stream.StreamD.Generate as D import Prelude hiding (any, all, take, takeWhile, sequence, concatMap, maybe, either, span , zip, filter) +import Streamly.Internal.Data.Parser.ParserD.NonFailing import Streamly.Internal.Data.Parser.ParserD.Tee import Streamly.Internal.Data.Parser.ParserD.Type diff --git a/core/src/Streamly/Internal/Data/Parser/ParserD/NonFailing.hs b/core/src/Streamly/Internal/Data/Parser/ParserD/NonFailing.hs new file mode 100644 index 0000000000..96e7c97348 --- /dev/null +++ b/core/src/Streamly/Internal/Data/Parser/ParserD/NonFailing.hs @@ -0,0 +1,213 @@ +-- | +-- Module : Streamly.Internal.Data.Parser.ParserD.NonFailing +-- Copyright : (c) 2020 Composewell Technologies +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC +-- +-- Parsers that can backtrack but never fail. Because they cannot fail they +-- cannot have an alternative instance. This enables us to write more +-- efficient sequential parsers, because we do not need buffering for the +-- failure case. +-- +-- These parsers lie between parsers that can fail and folds. They are more +-- powerful than folds because they add the backtracking capability to folds. +-- However, they are less powerful than parsers that can fail. + +module Streamly.Internal.Data.Parser.ParserD.NonFailing + ( + noErrorUnsafeSplit_ + , noErrorUnsafeSplitWith + , noErrorUnsafeConcatMap + ) +where + +import Control.Monad.Catch (throwM, MonadThrow) +import Streamly.Internal.Data.Parser.ParserD.Type + ( Initial(..), Step(..), Parser(..), SeqParseState(..), SeqAState(..) + , ConcatParseState(..), ParseError(..) + ) + +import Prelude hiding (concatMap, filter) +-- +-- $setup +-- >>> :m +-- >>> :set -package streamly +-- >>> import Control.Applicative ((<|>)) +-- >>> import Prelude hiding (concatMap) +-- >>> import qualified Streamly.Prelude as Stream +-- >>> import qualified Streamly.Internal.Data.Stream.IsStream as Stream (parse) +-- >>> import qualified Streamly.Internal.Data.Parser as Parser +-- >>> import qualified Streamly.Internal.Data.Parser.ParserD as ParserD + +-- | Works correctly only if the first parser is guaranteed to never fail. +{-# INLINE noErrorUnsafeSplitWith #-} +noErrorUnsafeSplitWith :: Monad m + => (a -> b -> c) -> Parser m x a -> Parser m x b -> Parser m x c +noErrorUnsafeSplitWith func (Parser stepL initialL extractL) + (Parser stepR initialR extractR) = + Parser step initial extract + + where + + initial = do + resL <- initialL + case resL of + IPartial sl -> return $ IPartial $ SeqParseL sl + IDone bl -> do + resR <- initialR + return $ case resR of + IPartial sr -> IPartial $ SeqParseR (func bl) sr + IDone br -> IDone (func bl br) + IError err -> IError err + IError err -> return $ IError err + + -- Note: For the composed parse to terminate, the left parser has to be + -- a terminating parser returning a Done at some point. + step (SeqParseL st) a = do + r <- stepL st a + case r of + -- Assume that the first parser can never fail, therefore we do not + -- need to keep the input for backtracking. + Partial n s -> return $ Partial n (SeqParseL s) + Continue n s -> return $ Continue n (SeqParseL s) + Done n b -> do + res <- initialR + return + $ case res of + IPartial sr -> Partial n $ SeqParseR (func b) sr + IDone br -> Done n (func b br) + IError err -> Error err + Error err -> return $ Error err + + step (SeqParseR f st) a = do + r <- stepR st a + return $ case r of + Partial n s -> Partial n (SeqParseR f s) + Continue n s -> Continue n (SeqParseR f s) + Done n b -> Done n (f b) + Error err -> Error err + + extract (SeqParseR f sR) = fmap f (extractR sR) + extract (SeqParseL sL) = do + rL <- extractL sL + res <- initialR + case res of + IPartial sR -> do + rR <- extractR sR + return $ func rL rR + IDone rR -> return $ func rL rR + IError err -> error $ "noErrorUnsafeSplitWith: cannot use a " + ++ "failing parser. Parser failed with: " ++ err + +{-# INLINE noErrorUnsafeSplit_ #-} +noErrorUnsafeSplit_ :: MonadThrow m => Parser m x a -> Parser m x b -> Parser m x b +noErrorUnsafeSplit_ (Parser stepL initialL extractL) (Parser stepR initialR extractR) = + Parser step initial extract + + where + + initial = do + resL <- initialL + case resL of + IPartial sl -> return $ IPartial $ SeqAL sl + IDone _ -> do + resR <- initialR + return $ case resR of + IPartial sr -> IPartial $ SeqAR sr + IDone br -> IDone br + IError err -> IError err + IError err -> return $ IError err + + -- Note: For the composed parse to terminate, the left parser has to be + -- a terminating parser returning a Done at some point. + step (SeqAL st) a = do + -- Important: Please do not use Applicative here. Applicative somehow + -- caused the next action to run many times in the "tar" parsing code, + -- not sure why though. + resL <- stepL st a + case resL of + Partial n s -> return $ Partial n (SeqAL s) + Continue n s -> return $ Continue n (SeqAL s) + Done n _ -> do + initR <- initialR + return $ case initR of + IPartial s -> Partial n (SeqAR s) + IDone b -> Done n b + IError err -> Error err + Error err -> return $ Error err + + step (SeqAR st) a = + (\case + Partial n s -> Partial n (SeqAR s) + Continue n s -> Continue n (SeqAR s) + Done n b -> Done n b + Error err -> Error err) <$> stepR st a + + extract (SeqAR sR) = extractR sR + extract (SeqAL sL) = do + _ <- extractL sL + res <- initialR + case res of + IPartial sR -> extractR sR + IDone rR -> return rR + IError err -> throwM $ ParseError err + +{-# INLINE noErrorUnsafeConcatMap #-} +noErrorUnsafeConcatMap :: MonadThrow m => + (b -> Parser m a c) -> Parser m a b -> Parser m a c +noErrorUnsafeConcatMap func (Parser stepL initialL extractL) = + Parser step initial extract + + where + + {-# INLINE initializeR #-} + initializeR (Parser stepR initialR extractR) = do + resR <- initialR + return $ case resR of + IPartial sr -> IPartial $ ConcatParseR stepR sr extractR + IDone br -> IDone br + IError err -> IError err + + initial = do + res <- initialL + case res of + IPartial s -> return $ IPartial $ ConcatParseL s + IDone b -> initializeR (func b) + IError err -> return $ IError err + + {-# INLINE initializeRL #-} + initializeRL n (Parser stepR initialR extractR) = do + resR <- initialR + return $ case resR of + IPartial sr -> Partial n $ ConcatParseR stepR sr extractR + IDone br -> Done n br + IError err -> Error err + + step (ConcatParseL st) a = do + r <- stepL st a + case r of + Partial n s -> return $ Partial n (ConcatParseL s) + Continue n s -> return $ Continue n (ConcatParseL s) + Done n b -> initializeRL n (func b) + Error err -> return $ Error err + + step (ConcatParseR stepR st extractR) a = do + r <- stepR st a + return $ case r of + Partial n s -> Partial n $ ConcatParseR stepR s extractR + Continue n s -> Continue n $ ConcatParseR stepR s extractR + Done n b -> Done n b + Error err -> Error err + + {-# INLINE extractP #-} + extractP (Parser _ initialR extractR) = do + res <- initialR + case res of + IPartial s -> extractR s + IDone b -> return b + IError err -> throwM $ ParseError err + + extract (ConcatParseR _ s extractR) = extractR s + extract (ConcatParseL sL) = extractL sL >>= extractP . func diff --git a/core/src/Streamly/Internal/Data/Parser/ParserD/Type.hs b/core/src/Streamly/Internal/Data/Parser/ParserD/Type.hs index 2000140839..93bae9ab48 100644 --- a/core/src/Streamly/Internal/Data/Parser/ParserD/Type.hs +++ b/core/src/Streamly/Internal/Data/Parser/ParserD/Type.hs @@ -185,7 +185,9 @@ module Streamly.Internal.Data.Parser.ParserD.Type , fromPure , fromEffect + , SeqParseState(..) , serialWith + , SeqAState(..) , split_ , die @@ -194,16 +196,13 @@ module Streamly.Internal.Data.Parser.ParserD.Type , splitMany -- parseMany? , splitManyPost , alt + , ConcatParseState (..) , concatMap -- * Input transformation , lmap , lmapM , filter - - , noErrorUnsafeSplit_ - , noErrorUnsafeSplitWith - , noErrorUnsafeConcatMap ) where @@ -746,66 +745,6 @@ serialWith func (Parser stepL initialL extractL) IDone rR -> return $ func rL rR IError err -> throwM $ ParseError err --- | Works correctly only if the first parser is guaranteed to never fail. -{-# INLINE noErrorUnsafeSplitWith #-} -noErrorUnsafeSplitWith :: Monad m - => (a -> b -> c) -> Parser m x a -> Parser m x b -> Parser m x c -noErrorUnsafeSplitWith func (Parser stepL initialL extractL) - (Parser stepR initialR extractR) = - Parser step initial extract - - where - - initial = do - resL <- initialL - case resL of - IPartial sl -> return $ IPartial $ SeqParseL sl - IDone bl -> do - resR <- initialR - return $ case resR of - IPartial sr -> IPartial $ SeqParseR (func bl) sr - IDone br -> IDone (func bl br) - IError err -> IError err - IError err -> return $ IError err - - -- Note: For the composed parse to terminate, the left parser has to be - -- a terminating parser returning a Done at some point. - step (SeqParseL st) a = do - r <- stepL st a - case r of - -- Assume that the first parser can never fail, therefore we do not - -- need to keep the input for backtracking. - Partial n s -> return $ Partial n (SeqParseL s) - Continue n s -> return $ Continue n (SeqParseL s) - Done n b -> do - res <- initialR - return - $ case res of - IPartial sr -> Partial n $ SeqParseR (func b) sr - IDone br -> Done n (func b br) - IError err -> Error err - Error err -> return $ Error err - - step (SeqParseR f st) a = do - r <- stepR st a - return $ case r of - Partial n s -> Partial n (SeqParseR f s) - Continue n s -> Continue n (SeqParseR f s) - Done n b -> Done n (f b) - Error err -> Error err - - extract (SeqParseR f sR) = fmap f (extractR sR) - extract (SeqParseL sL) = do - rL <- extractL sL - res <- initialR - case res of - IPartial sR -> do - rR <- extractR sR - return $ func rL rR - IDone rR -> return $ func rL rR - IError err -> error $ "noErrorUnsafeSplitWith: cannot use a " - ++ "failing parser. Parser failed with: " ++ err - {-# ANN type SeqAState Fuse #-} data SeqAState sl sr = SeqAL sl | SeqAR sr @@ -868,59 +807,6 @@ split_ (Parser stepL initialL extractL) (Parser stepR initialR extractR) = IDone rR -> return rR IError err -> throwM $ ParseError err -{-# INLINE noErrorUnsafeSplit_ #-} -noErrorUnsafeSplit_ :: MonadThrow m => Parser m x a -> Parser m x b -> Parser m x b -noErrorUnsafeSplit_ (Parser stepL initialL extractL) (Parser stepR initialR extractR) = - Parser step initial extract - - where - - initial = do - resL <- initialL - case resL of - IPartial sl -> return $ IPartial $ SeqAL sl - IDone _ -> do - resR <- initialR - return $ case resR of - IPartial sr -> IPartial $ SeqAR sr - IDone br -> IDone br - IError err -> IError err - IError err -> return $ IError err - - -- Note: For the composed parse to terminate, the left parser has to be - -- a terminating parser returning a Done at some point. - step (SeqAL st) a = do - -- Important: Please do not use Applicative here. Applicative somehow - -- caused the next action to run many times in the "tar" parsing code, - -- not sure why though. - resL <- stepL st a - case resL of - Partial n s -> return $ Partial n (SeqAL s) - Continue n s -> return $ Continue n (SeqAL s) - Done n _ -> do - initR <- initialR - return $ case initR of - IPartial s -> Partial n (SeqAR s) - IDone b -> Done n b - IError err -> Error err - Error err -> return $ Error err - - step (SeqAR st) a = - (\case - Partial n s -> Partial n (SeqAR s) - Continue n s -> Continue n (SeqAR s) - Done n b -> Done n b - Error err -> Error err) <$> stepR st a - - extract (SeqAR sR) = extractR sR - extract (SeqAL sL) = do - _ <- extractL sL - res <- initialR - case res of - IPartial sR -> extractR sR - IDone rR -> return rR - IError err -> throwM $ ParseError err - -- | 'Applicative' form of 'serialWith'. instance MonadThrow m => Applicative (Parser m a) where {-# INLINE pure #-} @@ -1323,64 +1209,6 @@ concatMap func (Parser stepL initialL extractL) = Parser step initial extract extract (ConcatParseR _ s extractR) = extractR s extract (ConcatParseL sL) = extractL sL >>= extractP . func -{-# INLINE noErrorUnsafeConcatMap #-} -noErrorUnsafeConcatMap :: MonadThrow m => - (b -> Parser m a c) -> Parser m a b -> Parser m a c -noErrorUnsafeConcatMap func (Parser stepL initialL extractL) = - Parser step initial extract - - where - - {-# INLINE initializeR #-} - initializeR (Parser stepR initialR extractR) = do - resR <- initialR - return $ case resR of - IPartial sr -> IPartial $ ConcatParseR stepR sr extractR - IDone br -> IDone br - IError err -> IError err - - initial = do - res <- initialL - case res of - IPartial s -> return $ IPartial $ ConcatParseL s - IDone b -> initializeR (func b) - IError err -> return $ IError err - - {-# INLINE initializeRL #-} - initializeRL n (Parser stepR initialR extractR) = do - resR <- initialR - return $ case resR of - IPartial sr -> Partial n $ ConcatParseR stepR sr extractR - IDone br -> Done n br - IError err -> Error err - - step (ConcatParseL st) a = do - r <- stepL st a - case r of - Partial n s -> return $ Partial n (ConcatParseL s) - Continue n s -> return $ Continue n (ConcatParseL s) - Done n b -> initializeRL n (func b) - Error err -> return $ Error err - - step (ConcatParseR stepR st extractR) a = do - r <- stepR st a - return $ case r of - Partial n s -> Partial n $ ConcatParseR stepR s extractR - Continue n s -> Continue n $ ConcatParseR stepR s extractR - Done n b -> Done n b - Error err -> Error err - - {-# INLINE extractP #-} - extractP (Parser _ initialR extractR) = do - res <- initialR - case res of - IPartial s -> extractR s - IDone b -> return b - IError err -> throwM $ ParseError err - - extract (ConcatParseR _ s extractR) = extractR s - extract (ConcatParseL sL) = extractL sL >>= extractP . func - -- Note: The monad instance has quadratic performance complexity. It works fine -- for small number of compositions but for a scalable implementation we need a -- CPS version. diff --git a/core/streamly-core.cabal b/core/streamly-core.cabal index 0d9909238f..6e58ee1586 100644 --- a/core/streamly-core.cabal +++ b/core/streamly-core.cabal @@ -274,6 +274,7 @@ library , Streamly.Internal.Data.Producer.Source , Streamly.Internal.Data.Parser.ParserK.Type , Streamly.Internal.Data.Parser.ParserD.Type + , Streamly.Internal.Data.Parser.ParserD.NonFailing , Streamly.Internal.Data.Pipe.Type -- streamly-core-array-types diff --git a/streamly.cabal b/streamly.cabal index 11f9f1112b..daa645ab89 100644 --- a/streamly.cabal +++ b/streamly.cabal @@ -160,6 +160,7 @@ extra-source-files: core/src/Streamly/Internal/Data/Producer.hs core/src/Streamly/Internal/Data/Producer/Source.hs core/src/Streamly/Internal/Data/Parser/ParserK/Type.hs + core/src/Streamly/Internal/Data/Parser/ParserD/NonFailing.hs core/src/Streamly/Internal/Data/Parser/ParserD/Type.hs core/src/Streamly/Internal/Data/Pipe/Type.hs core/src/Streamly/Internal/Data/Unboxed.hs @@ -567,6 +568,7 @@ library , Streamly.Internal.Data.Producer.Source , Streamly.Internal.Data.Parser.ParserK.Type + , Streamly.Internal.Data.Parser.ParserD.NonFailing , Streamly.Internal.Data.Parser.ParserD.Type , Streamly.Internal.Data.Pipe.Type From 69f13434b3038d334c201738a57ba7ed3f3577ed Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Sat, 13 Aug 2022 20:05:26 +0530 Subject: [PATCH 2/2] Move non-failing parsers to the NonFailing module --- .../Streamly/Internal/Data/Parser/ParserD.hs | 319 +---------------- .../Data/Parser/ParserD/NonFailing.hs | 337 +++++++++++++++++- 2 files changed, 338 insertions(+), 318 deletions(-) diff --git a/core/src/Streamly/Internal/Data/Parser/ParserD.hs b/core/src/Streamly/Internal/Data/Parser/ParserD.hs index 3134e52818..ed03424b4f 100644 --- a/core/src/Streamly/Internal/Data/Parser/ParserD.hs +++ b/core/src/Streamly/Internal/Data/Parser/ParserD.hs @@ -274,30 +274,7 @@ toFold (Parser pstep pinitial pextract) = Fold step initial pextract ------------------------------------------------------------------------------- -- Upgrade folds to parses ------------------------------------------------------------------------------- --- --- | See 'Streamly.Internal.Data.Parser.fromFold'. --- --- /Pre-release/ --- -{-# INLINE fromFold #-} -fromFold :: Monad m => Fold m a b -> Parser m a b -fromFold (Fold fstep finitial fextract) = Parser step initial fextract - - where - - initial = do - res <- finitial - return - $ case res of - FL.Partial s1 -> IPartial s1 - FL.Done b -> IDone b - - step s a = do - res <- fstep s a - return - $ case res of - FL.Partial s1 -> Partial 0 s1 - FL.Done b -> Done 0 b +-- XXX fromMaybeFold -- | Convert Maybe returning folds to error returning parsers. -- @@ -369,18 +346,6 @@ eof = Parser step initial return step () _ = return $ Error "eof: not at end of input" --- | See 'Streamly.Internal.Data.Parser.next'. --- --- /Pre-release/ --- -{-# INLINE next #-} -next :: Monad m => Parser m a (Maybe a) -next = Parser step initial extract - where - initial = pure $ IPartial () - step _ a = pure $ Done 0 (Just a) - extract _ = pure Nothing - -- | See 'Streamly.Internal.Data.Parser.either'. -- -- /Pre-release/ @@ -654,33 +619,6 @@ takeWhileP predicate (Parser pstep pinitial pextract) = then pstep s a else Done 1 <$> pextract s --- | See 'Streamly.Internal.Data.Parser.takeWhile'. --- --- /Pre-release/ --- -{-# INLINE takeWhile #-} -takeWhile :: Monad m => (a -> Bool) -> Fold m a b -> Parser m a b -takeWhile predicate (Fold fstep finitial fextract) = - Parser step initial fextract - - where - - initial = do - res <- finitial - return $ case res of - FL.Partial s -> IPartial s - FL.Done b -> IDone b - - step s a = - if predicate a - then do - fres <- fstep s a - return - $ case fres of - FL.Partial s1 -> Partial 0 s1 - FL.Done b -> Done 0 b - else Done 1 <$> fextract s - -- | See 'Streamly.Internal.Data.Parser.takeWhile1'. -- -- /Pre-release/ @@ -1037,52 +975,6 @@ takeFramedBy_ isBegin isEnd (Fold fstep finitial fextract) = -- Grouping and words ------------------------------------------------------------------------------- -data WordByState s b = WBLeft !s | WBWord !s | WBRight !b - --- | See 'Streamly.Internal.Data.Parser.wordBy'. --- --- -{-# INLINE wordBy #-} -wordBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser m a b -wordBy predicate (Fold fstep finitial fextract) = Parser step initial extract - - where - - {-# INLINE worder #-} - worder s a = do - res <- fstep s a - return - $ case res of - FL.Partial s1 -> Partial 0 $ WBWord s1 - FL.Done b -> Done 0 b - - initial = do - res <- finitial - return - $ case res of - FL.Partial s -> IPartial $ WBLeft s - FL.Done b -> IDone b - - step (WBLeft s) a = - if not (predicate a) - then worder s a - else return $ Partial 0 $ WBLeft s - step (WBWord s) a = - if not (predicate a) - then worder s a - else do - b <- fextract s - return $ Partial 0 $ WBRight b - step (WBRight b) a = - return - $ if not (predicate a) - then Done 1 b - else Partial 0 $ WBRight b - - extract (WBLeft s) = fextract s - extract (WBWord s) = fextract s - extract (WBRight b) = return b - data WordFramedState s b = WordFramedSkipPre !s | WordFramedWord !s !Int @@ -1283,163 +1175,6 @@ wordQuotedBy keepQuotes isEsc isBegin isEnd toRight isSep err "wordQuotedBy: trailing escape" extract (WordQuotedSkipPost b) = return b -{-# ANN type GroupByState Fuse #-} -data GroupByState a s - = GroupByInit !s - | GroupByGrouping !a !s - --- | See 'Streamly.Internal.Data.Parser.groupBy'. --- -{-# INLINE groupBy #-} -groupBy :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser m a b -groupBy eq (Fold fstep finitial fextract) = Parser step initial extract - - where - - {-# INLINE grouper #-} - grouper s a0 a = do - res <- fstep s a - return - $ case res of - FL.Done b -> Done 0 b - FL.Partial s1 -> Partial 0 (GroupByGrouping a0 s1) - - initial = do - res <- finitial - return - $ case res of - FL.Partial s -> IPartial $ GroupByInit s - FL.Done b -> IDone b - - step (GroupByInit s) a = grouper s a a - step (GroupByGrouping a0 s) a = - if eq a0 a - then grouper s a0 a - else Done 1 <$> fextract s - - extract (GroupByInit s) = fextract s - extract (GroupByGrouping _ s) = fextract s - --- | See 'Streamly.Internal.Data.Parser.groupByRolling'. --- -{-# INLINE groupByRolling #-} -groupByRolling :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser m a b -groupByRolling eq (Fold fstep finitial fextract) = Parser step initial extract - - where - - {-# INLINE grouper #-} - grouper s a = do - res <- fstep s a - return - $ case res of - FL.Done b -> Done 0 b - FL.Partial s1 -> Partial 0 (GroupByGrouping a s1) - - initial = do - res <- finitial - return - $ case res of - FL.Partial s -> IPartial $ GroupByInit s - FL.Done b -> IDone b - - step (GroupByInit s) a = grouper s a - step (GroupByGrouping a0 s) a = - if eq a0 a - then grouper s a - else Done 1 <$> fextract s - - extract (GroupByInit s) = fextract s - extract (GroupByGrouping _ s) = fextract s - -{-# ANN type GroupByStatePair Fuse #-} -data GroupByStatePair a s1 s2 - = GroupByInitPair !s1 !s2 - | GroupByGroupingPair !a !s1 !s2 - | GroupByGroupingPairL !a !s1 !s2 - | GroupByGroupingPairR !a !s1 !s2 - -{-# INLINE groupByRollingEither #-} -groupByRollingEither :: MonadCatch m => - (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser m a (Either b c) -groupByRollingEither - eq - (Fold fstep1 finitial1 fextract1) - (Fold fstep2 finitial2 fextract2) = Parser step initial extract - - where - - {-# INLINE grouper #-} - grouper s1 s2 a = do - return $ Continue 0 (GroupByGroupingPair a s1 s2) - - {-# INLINE grouperL2 #-} - grouperL2 s1 s2 a = do - res <- fstep1 s1 a - return - $ case res of - FL.Done b -> Done 0 (Left b) - FL.Partial s11 -> Partial 0 (GroupByGroupingPairL a s11 s2) - - {-# INLINE grouperL #-} - grouperL s1 s2 a0 a = do - res <- fstep1 s1 a0 - case res of - FL.Done b -> return $ Done 0 (Left b) - FL.Partial s11 -> grouperL2 s11 s2 a - - {-# INLINE grouperR2 #-} - grouperR2 s1 s2 a = do - res <- fstep2 s2 a - return - $ case res of - FL.Done b -> Done 0 (Right b) - FL.Partial s21 -> Partial 0 (GroupByGroupingPairR a s1 s21) - - {-# INLINE grouperR #-} - grouperR s1 s2 a0 a = do - res <- fstep2 s2 a0 - case res of - FL.Done b -> return $ Done 0 (Right b) - FL.Partial s21 -> grouperR2 s1 s21 a - - initial = do - res1 <- finitial1 - res2 <- finitial2 - return - $ case res1 of - FL.Partial s1 -> - case res2 of - FL.Partial s2 -> IPartial $ GroupByInitPair s1 s2 - FL.Done b -> IDone (Right b) - FL.Done b -> IDone (Left b) - - step (GroupByInitPair s1 s2) a = grouper s1 s2 a - - step (GroupByGroupingPair a0 s1 s2) a = - if not (eq a0 a) - then grouperL s1 s2 a0 a - else grouperR s1 s2 a0 a - - step (GroupByGroupingPairL a0 s1 s2) a = - if not (eq a0 a) - then grouperL2 s1 s2 a - else Done 1 . Left <$> fextract1 s1 - - step (GroupByGroupingPairR a0 s1 s2) a = - if eq a0 a - then grouperR2 s1 s2 a - else Done 1 . Right <$> fextract2 s2 - - extract (GroupByInitPair s1 _) = Left <$> fextract1 s1 - extract (GroupByGroupingPairL _ s1 _) = Left <$> fextract1 s1 - extract (GroupByGroupingPairR _ _ s2) = Right <$> fextract2 s2 - extract (GroupByGroupingPair a s1 _) = do - res <- fstep1 s1 a - case res of - FL.Done b -> return $ Left b - FL.Partial s11 -> Left <$> fextract1 s11 - -- XXX use an Unfold instead of a list? -- XXX custom combinators for matching list, array and stream? -- XXX rename to listBy? @@ -1534,6 +1269,8 @@ postscan :: -- Monad m => Fold m a b -> Parser m b c -> Parser m a c postscan = undefined +-- XXX More variants of this are possible based on how do we end the fold, when +-- the stream ends, when the fold ends, or when any ends. {-# INLINE zipWithM #-} zipWithM :: MonadThrow m => (a -> b -> m c) -> D.Stream m a -> Fold m c x -> Parser m b x @@ -1631,56 +1368,6 @@ sampleFromthen :: MonadThrow m => Int -> Int -> Fold m a b -> Parser m a b sampleFromthen offset size = makeIndexFilter indexed FL.filter (\(i, _) -> (i + offset) `mod` size == 0) --------------------------------------------------------------------------------- ---- Spanning --------------------------------------------------------------------------------- - --- | @span p f1 f2@ composes folds @f1@ and @f2@ such that @f1@ consumes the --- input as long as the predicate @p@ is 'True'. @f2@ consumes the rest of the --- input. --- --- @ --- > let span_ p xs = Stream.parse (Parser.span p Fold.toList Fold.toList) $ Stream.fromList xs --- --- > span_ (< 1) [1,2,3] --- ([],[1,2,3]) --- --- > span_ (< 2) [1,2,3] --- ([1],[2,3]) --- --- > span_ (< 4) [1,2,3] --- ([1,2,3],[]) --- --- @ --- --- /Pre-release/ -{-# INLINE span #-} -span :: Monad m => (a -> Bool) -> Fold m a b -> Fold m a c -> Parser m a (b, c) -span p f1 f2 = noErrorUnsafeSplitWith (,) (takeWhile p f1) (fromFold f2) - --- | Break the input stream into two groups, the first group takes the input as --- long as the predicate applied to the first element of the stream and next --- input element holds 'True', the second group takes the rest of the input. --- --- /Pre-release/ --- -{-# INLINE spanBy #-} -spanBy :: - Monad m - => (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser m a (b, c) -spanBy eq f1 f2 = noErrorUnsafeSplitWith (,) (groupBy eq f1) (fromFold f2) - --- | Like 'spanBy' but applies the predicate in a rolling fashion i.e. --- predicate is applied to the previous and the next input elements. --- --- /Pre-release/ -{-# INLINE spanByRolling #-} -spanByRolling :: - Monad m - => (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser m a (b, c) -spanByRolling eq f1 f2 = - noErrorUnsafeSplitWith (,) (groupByRolling eq f1) (fromFold f2) - ------------------------------------------------------------------------------- -- nested parsers ------------------------------------------------------------------------------- diff --git a/core/src/Streamly/Internal/Data/Parser/ParserD/NonFailing.hs b/core/src/Streamly/Internal/Data/Parser/ParserD/NonFailing.hs index 96e7c97348..62e080351a 100644 --- a/core/src/Streamly/Internal/Data/Parser/ParserD/NonFailing.hs +++ b/core/src/Streamly/Internal/Data/Parser/ParserD/NonFailing.hs @@ -13,23 +13,39 @@ -- -- These parsers lie between parsers that can fail and folds. They are more -- powerful than folds because they add the backtracking capability to folds. --- However, they are less powerful than parsers that can fail. +-- However, they are less powerful than regular parsers because these cannot +-- fail. module Streamly.Internal.Data.Parser.ParserD.NonFailing ( noErrorUnsafeSplit_ , noErrorUnsafeSplitWith , noErrorUnsafeConcatMap + + , fromFold + , next + , takeWhile + , wordBy + , groupBy + , groupByRolling + , groupByRollingEither + , span + , spanBy + , spanByRolling ) where import Control.Monad.Catch (throwM, MonadThrow) +import Fusion.Plugin.Types (Fuse(..)) +import Streamly.Internal.Data.Fold.Type (Fold(..)) import Streamly.Internal.Data.Parser.ParserD.Type ( Initial(..), Step(..), Parser(..), SeqParseState(..), SeqAState(..) , ConcatParseState(..), ParseError(..) ) +import qualified Streamly.Internal.Data.Fold.Type as FL + +import Prelude hiding (concatMap, filter, takeWhile, span) -import Prelude hiding (concatMap, filter) -- -- $setup -- >>> :m @@ -211,3 +227,320 @@ noErrorUnsafeConcatMap func (Parser stepL initialL extractL) = extract (ConcatParseR _ s extractR) = extractR s extract (ConcatParseL sL) = extractL sL >>= extractP . func + +-- +-- | See 'Streamly.Internal.Data.Parser.fromFold'. +-- +-- /Pre-release/ +-- +{-# INLINE fromFold #-} +fromFold :: Monad m => Fold m a b -> Parser m a b +fromFold (Fold fstep finitial fextract) = Parser step initial fextract + + where + + initial = do + res <- finitial + return + $ case res of + FL.Partial s1 -> IPartial s1 + FL.Done b -> IDone b + + step s a = do + res <- fstep s a + return + $ case res of + FL.Partial s1 -> Partial 0 s1 + FL.Done b -> Done 0 b + +-- | See 'Streamly.Internal.Data.Parser.next'. +-- +-- /Pre-release/ +-- +{-# INLINE next #-} +next :: Monad m => Parser m a (Maybe a) +next = Parser step initial extract + where + initial = pure $ IPartial () + step _ a = pure $ Done 0 (Just a) + extract _ = pure Nothing + +-- | See 'Streamly.Internal.Data.Parser.takeWhile'. +-- +-- /Pre-release/ +-- +{-# INLINE takeWhile #-} +takeWhile :: Monad m => (a -> Bool) -> Fold m a b -> Parser m a b +takeWhile predicate (Fold fstep finitial fextract) = + Parser step initial fextract + + where + + initial = do + res <- finitial + return $ case res of + FL.Partial s -> IPartial s + FL.Done b -> IDone b + + step s a = + if predicate a + then do + fres <- fstep s a + return + $ case fres of + FL.Partial s1 -> Partial 0 s1 + FL.Done b -> Done 0 b + else Done 1 <$> fextract s + +data WordByState s b = WBLeft !s | WBWord !s | WBRight !b + +-- | See 'Streamly.Internal.Data.Parser.wordBy'. +-- +-- +{-# INLINE wordBy #-} +wordBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser m a b +wordBy predicate (Fold fstep finitial fextract) = Parser step initial extract + + where + + {-# INLINE worder #-} + worder s a = do + res <- fstep s a + return + $ case res of + FL.Partial s1 -> Partial 0 $ WBWord s1 + FL.Done b -> Done 0 b + + initial = do + res <- finitial + return + $ case res of + FL.Partial s -> IPartial $ WBLeft s + FL.Done b -> IDone b + + step (WBLeft s) a = + if not (predicate a) + then worder s a + else return $ Partial 0 $ WBLeft s + step (WBWord s) a = + if not (predicate a) + then worder s a + else do + b <- fextract s + return $ Partial 0 $ WBRight b + step (WBRight b) a = + return + $ if not (predicate a) + then Done 1 b + else Partial 0 $ WBRight b + + extract (WBLeft s) = fextract s + extract (WBWord s) = fextract s + extract (WBRight b) = return b + +{-# ANN type GroupByState Fuse #-} +data GroupByState a s + = GroupByInit !s + | GroupByGrouping !a !s + +-- | See 'Streamly.Internal.Data.Parser.groupBy'. +-- +{-# INLINE groupBy #-} +groupBy :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser m a b +groupBy eq (Fold fstep finitial fextract) = Parser step initial extract + + where + + {-# INLINE grouper #-} + grouper s a0 a = do + res <- fstep s a + return + $ case res of + FL.Done b -> Done 0 b + FL.Partial s1 -> Partial 0 (GroupByGrouping a0 s1) + + initial = do + res <- finitial + return + $ case res of + FL.Partial s -> IPartial $ GroupByInit s + FL.Done b -> IDone b + + step (GroupByInit s) a = grouper s a a + step (GroupByGrouping a0 s) a = + if eq a0 a + then grouper s a0 a + else Done 1 <$> fextract s + + extract (GroupByInit s) = fextract s + extract (GroupByGrouping _ s) = fextract s + +-- | See 'Streamly.Internal.Data.Parser.groupByRolling'. +-- +{-# INLINE groupByRolling #-} +groupByRolling :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser m a b +groupByRolling eq (Fold fstep finitial fextract) = Parser step initial extract + + where + + {-# INLINE grouper #-} + grouper s a = do + res <- fstep s a + return + $ case res of + FL.Done b -> Done 0 b + FL.Partial s1 -> Partial 0 (GroupByGrouping a s1) + + initial = do + res <- finitial + return + $ case res of + FL.Partial s -> IPartial $ GroupByInit s + FL.Done b -> IDone b + + step (GroupByInit s) a = grouper s a + step (GroupByGrouping a0 s) a = + if eq a0 a + then grouper s a + else Done 1 <$> fextract s + + extract (GroupByInit s) = fextract s + extract (GroupByGrouping _ s) = fextract s + +{-# ANN type GroupByStatePair Fuse #-} +data GroupByStatePair a s1 s2 + = GroupByInitPair !s1 !s2 + | GroupByGroupingPair !a !s1 !s2 + | GroupByGroupingPairL !a !s1 !s2 + | GroupByGroupingPairR !a !s1 !s2 + +{-# INLINE groupByRollingEither #-} +groupByRollingEither :: Monad m => + (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser m a (Either b c) +groupByRollingEither + eq + (Fold fstep1 finitial1 fextract1) + (Fold fstep2 finitial2 fextract2) = Parser step initial extract + + where + + {-# INLINE grouper #-} + grouper s1 s2 a = do + return $ Continue 0 (GroupByGroupingPair a s1 s2) + + {-# INLINE grouperL2 #-} + grouperL2 s1 s2 a = do + res <- fstep1 s1 a + return + $ case res of + FL.Done b -> Done 0 (Left b) + FL.Partial s11 -> Partial 0 (GroupByGroupingPairL a s11 s2) + + {-# INLINE grouperL #-} + grouperL s1 s2 a0 a = do + res <- fstep1 s1 a0 + case res of + FL.Done b -> return $ Done 0 (Left b) + FL.Partial s11 -> grouperL2 s11 s2 a + + {-# INLINE grouperR2 #-} + grouperR2 s1 s2 a = do + res <- fstep2 s2 a + return + $ case res of + FL.Done b -> Done 0 (Right b) + FL.Partial s21 -> Partial 0 (GroupByGroupingPairR a s1 s21) + + {-# INLINE grouperR #-} + grouperR s1 s2 a0 a = do + res <- fstep2 s2 a0 + case res of + FL.Done b -> return $ Done 0 (Right b) + FL.Partial s21 -> grouperR2 s1 s21 a + + initial = do + res1 <- finitial1 + res2 <- finitial2 + return + $ case res1 of + FL.Partial s1 -> + case res2 of + FL.Partial s2 -> IPartial $ GroupByInitPair s1 s2 + FL.Done b -> IDone (Right b) + FL.Done b -> IDone (Left b) + + step (GroupByInitPair s1 s2) a = grouper s1 s2 a + + step (GroupByGroupingPair a0 s1 s2) a = + if not (eq a0 a) + then grouperL s1 s2 a0 a + else grouperR s1 s2 a0 a + + step (GroupByGroupingPairL a0 s1 s2) a = + if not (eq a0 a) + then grouperL2 s1 s2 a + else Done 1 . Left <$> fextract1 s1 + + step (GroupByGroupingPairR a0 s1 s2) a = + if eq a0 a + then grouperR2 s1 s2 a + else Done 1 . Right <$> fextract2 s2 + + extract (GroupByInitPair s1 _) = Left <$> fextract1 s1 + extract (GroupByGroupingPairL _ s1 _) = Left <$> fextract1 s1 + extract (GroupByGroupingPairR _ _ s2) = Right <$> fextract2 s2 + extract (GroupByGroupingPair a s1 _) = do + res <- fstep1 s1 a + case res of + FL.Done b -> return $ Left b + FL.Partial s11 -> Left <$> fextract1 s11 + +-------------------------------------------------------------------------------- +--- Spanning +-------------------------------------------------------------------------------- + +-- | @span p f1 f2@ composes folds @f1@ and @f2@ such that @f1@ consumes the +-- input as long as the predicate @p@ is 'True'. @f2@ consumes the rest of the +-- input. +-- +-- @ +-- > let span_ p xs = Stream.parse (Parser.span p Fold.toList Fold.toList) $ Stream.fromList xs +-- +-- > span_ (< 1) [1,2,3] +-- ([],[1,2,3]) +-- +-- > span_ (< 2) [1,2,3] +-- ([1],[2,3]) +-- +-- > span_ (< 4) [1,2,3] +-- ([1,2,3],[]) +-- +-- @ +-- +-- /Pre-release/ +{-# INLINE span #-} +span :: Monad m => (a -> Bool) -> Fold m a b -> Fold m a c -> Parser m a (b, c) +span p f1 f2 = noErrorUnsafeSplitWith (,) (takeWhile p f1) (fromFold f2) + +-- | Break the input stream into two groups, the first group takes the input as +-- long as the predicate applied to the first element of the stream and next +-- input element holds 'True', the second group takes the rest of the input. +-- +-- /Pre-release/ +-- +{-# INLINE spanBy #-} +spanBy :: + Monad m + => (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser m a (b, c) +spanBy eq f1 f2 = noErrorUnsafeSplitWith (,) (groupBy eq f1) (fromFold f2) + +-- | Like 'spanBy' but applies the predicate in a rolling fashion i.e. +-- predicate is applied to the previous and the next input elements. +-- +-- /Pre-release/ +{-# INLINE spanByRolling #-} +spanByRolling :: + Monad m + => (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser m a (b, c) +spanByRolling eq f1 f2 = + noErrorUnsafeSplitWith (,) (groupByRolling eq f1) (fromFold f2)