From 15783462ebbd38930849c565dbfeb5de3e94f057 Mon Sep 17 00:00:00 2001 From: TheMatten Date: Tue, 2 Mar 2021 23:00:06 +0100 Subject: [PATCH 1/6] Add spanEnd and breakEnd to Data.Text --- src/Data/Text.hs | 22 +++++++++++++++++++++- src/Data/Text/Internal/Private.hs | 13 ++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/Data/Text.hs b/src/Data/Text.hs index 86cfd84f..c587cd5f 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -141,7 +141,9 @@ module Data.Text , breakOn , breakOnEnd , break + , breakEnd , span + , spanEnd , group , groupBy , inits @@ -221,7 +223,7 @@ import qualified Data.Text.Internal.Fusion as S import qualified Data.Text.Internal.Fusion.Common as S import Data.Text.Encoding (decodeUtf8', encodeUtf8) import Data.Text.Internal.Fusion (stream, reverseStream, unstream) -import Data.Text.Internal.Private (span_) +import Data.Text.Internal.Private (span_, spanEnd_) import Data.Text.Internal (Text(..), empty, firstf, mul, safe, text) import Data.Text.Show (singleton, unpack, unpackCString#) import qualified Prelude as P @@ -1333,6 +1335,15 @@ span p t = case span_ p t of (# hd,tl #) -> (hd,tl) {-# INLINE span #-} +-- | /O(n)/ Similar to 'span', but searches from the end of the +-- string. +-- +-- >>> T.spanEnd (=='0') "AB000" +-- ("AB", "000") +spanEnd :: (Char -> Bool) -> Text -> (Text, Text) +spanEnd p t = case spanEnd_ p t of (# hd, tl #) -> (hd, tl) +{-# inline spanEnd #-} + -- | /O(n)/ 'break' is like 'span', but the prefix returned is -- over elements that fail the predicate @p@. -- @@ -1342,6 +1353,15 @@ break :: (Char -> Bool) -> Text -> (Text, Text) break p = span (not . p) {-# INLINE break #-} +-- | /O(n)/ Similar to 'break', but searches from the end of the +-- string. +-- +-- >>> T.breakEnd (=='0') "180cm" +-- ("180","cm") +breakEnd :: (Char -> Bool) -> Text -> (Text, Text) +breakEnd p = spanEnd (not . p) +{-# inline breakEnd #-} + -- | /O(n)/ Group characters in a string according to a predicate. groupBy :: (Char -> Char -> Bool) -> Text -> [Text] groupBy p = loop diff --git a/src/Data/Text/Internal/Private.hs b/src/Data/Text/Internal/Private.hs index 7e6fe230..44a99fc2 100644 --- a/src/Data/Text/Internal/Private.hs +++ b/src/Data/Text/Internal/Private.hs @@ -13,11 +13,12 @@ module Data.Text.Internal.Private ( runText , span_ + , spanEnd_ ) where import Control.Monad.ST (ST, runST) import Data.Text.Internal (Text(..), text) -import Data.Text.Unsafe (Iter(..), iter) +import Data.Text.Unsafe (Iter(..), iter, reverseIter) import qualified Data.Text.Array as A span_ :: (Char -> Bool) -> Text -> (# Text, Text #) @@ -30,6 +31,16 @@ span_ p t@(Text arr off len) = (# hd,tl #) where Iter c d = iter t i {-# INLINE span_ #-} +spanEnd_ :: (Char -> Bool) -> Text -> (# Text, Text #) +spanEnd_ p t@(Text arr off len) = (# hd, tl #) + where hd = text arr off (k+1) + tl = text arr (off+k+1) (len-k-1) + !k = loop (len-1) + loop !i | i >= off && p c = loop (i+d) + | otherwise = i + where (c, d) = reverseIter t i +{-# INLINE spanEnd_ #-} + runText :: (forall s. (A.MArray s -> Int -> ST s Text) -> ST s Text) -> Text runText act = runST (act $ \ !marr !len -> do arr <- A.unsafeFreeze marr From 0b7b5c228de123ffff03f9f7177778e85113d3c3 Mon Sep 17 00:00:00 2001 From: TheMatten Date: Wed, 3 Mar 2021 17:41:04 +0100 Subject: [PATCH 2/6] Fix space in example --- src/Data/Text.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/Text.hs b/src/Data/Text.hs index c587cd5f..de2f51f2 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -1339,7 +1339,7 @@ span p t = case span_ p t of -- string. -- -- >>> T.spanEnd (=='0') "AB000" --- ("AB", "000") +-- ("AB","000") spanEnd :: (Char -> Bool) -> Text -> (Text, Text) spanEnd p t = case spanEnd_ p t of (# hd, tl #) -> (hd, tl) {-# inline spanEnd #-} From 5d4eafcbc5b121187bb82bd7bfb4a3b59fa6f374 Mon Sep 17 00:00:00 2001 From: TheMatten Date: Sat, 20 Mar 2021 22:02:32 +0100 Subject: [PATCH 3/6] Make formatting consistent --- src/Data/Text.hs | 2 +- src/Data/Text/Internal/Private.hs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Data/Text.hs b/src/Data/Text.hs index de2f51f2..480a2cb0 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -1341,7 +1341,7 @@ span p t = case span_ p t of -- >>> T.spanEnd (=='0') "AB000" -- ("AB","000") spanEnd :: (Char -> Bool) -> Text -> (Text, Text) -spanEnd p t = case spanEnd_ p t of (# hd, tl #) -> (hd, tl) +spanEnd p t = case spanEnd_ p t of (# hd,tl #) -> (hd,tl) {-# inline spanEnd #-} -- | /O(n)/ 'break' is like 'span', but the prefix returned is diff --git a/src/Data/Text/Internal/Private.hs b/src/Data/Text/Internal/Private.hs index 44a99fc2..32faa14c 100644 --- a/src/Data/Text/Internal/Private.hs +++ b/src/Data/Text/Internal/Private.hs @@ -32,13 +32,13 @@ span_ p t@(Text arr off len) = (# hd,tl #) {-# INLINE span_ #-} spanEnd_ :: (Char -> Bool) -> Text -> (# Text, Text #) -spanEnd_ p t@(Text arr off len) = (# hd, tl #) +spanEnd_ p t@(Text arr off len) = (# hd,tl #) where hd = text arr off (k+1) tl = text arr (off+k+1) (len-k-1) !k = loop (len-1) loop !i | i >= off && p c = loop (i+d) | otherwise = i - where (c, d) = reverseIter t i + where (c,d) = reverseIter t i {-# INLINE spanEnd_ #-} runText :: (forall s. (A.MArray s -> Int -> ST s Text) -> ST s Text) -> Text From 1b938563af15e386c326d84b200317d66e4985ba Mon Sep 17 00:00:00 2001 From: TheMatten Date: Sat, 20 Mar 2021 22:03:11 +0100 Subject: [PATCH 4/6] Add lazy variants of breakEnd and spanEnd --- src/Data/Text/Lazy.hs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index 606c538b..4ff4008c 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -145,9 +145,11 @@ module Data.Text.Lazy , stripEnd , splitAt , span + , spanEnd , breakOn , breakOnEnd , break + , breakEnd , group , groupBy , inits @@ -1365,6 +1367,15 @@ break p t0 = break' t0 | otherwise -> let (a,b) = T.splitAt n t in (Chunk a Empty, Chunk b ts) +-- | /O(n)/ Similar to 'break', but searches from the end of the string. +-- +-- >>> T.breakEnd (=='0') "180cm" +-- ("180","cm") +breakEnd :: (Char -> Bool) -> Text -> (Text, Text) +breakEnd p src = let (a,b) = break p (reverse src) + in (reverse b, reverse a) +{-# INLINE breakEnd #-} + -- | /O(n)/ 'span', applied to a predicate @p@ and text @t@, returns -- a pair whose first element is the longest prefix (possibly empty) -- of @t@ of elements that satisfy @p@, and whose second is the @@ -1376,6 +1387,14 @@ span :: (Char -> Bool) -> Text -> (Text, Text) span p = break (not . p) {-# INLINE span #-} +-- | /O(n)/ Similar to 'span', but searches from the end of the string. +-- +-- >>> T.spanEnd Data.Char.isAlpha "000AB" +-- ("000","AB") +spanEnd :: (Char -> Bool) -> Text -> (Text, Text) +spanEnd p = breakEnd (not . p) +{-# INLINE spanEnd #-} + -- | The 'group' function takes a 'Text' and returns a list of 'Text's -- such that the concatenation of the result is equal to the argument. -- Moreover, each sublist in the result contains only equal elements. From 982966a7611d108fe3f2c8ebc25960cc7065bbf3 Mon Sep 17 00:00:00 2001 From: TheMatten Date: Thu, 13 May 2021 11:58:54 +0200 Subject: [PATCH 5/6] Fix bounds check, add tests --- src/Data/Text/Internal/Private.hs | 8 ++++---- tests/Tests/Properties/Substrings.hs | 21 +++++++++++++++++++-- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/Data/Text/Internal/Private.hs b/src/Data/Text/Internal/Private.hs index 32faa14c..e7df6363 100644 --- a/src/Data/Text/Internal/Private.hs +++ b/src/Data/Text/Internal/Private.hs @@ -34,11 +34,11 @@ span_ p t@(Text arr off len) = (# hd,tl #) spanEnd_ :: (Char -> Bool) -> Text -> (# Text, Text #) spanEnd_ p t@(Text arr off len) = (# hd,tl #) where hd = text arr off (k+1) - tl = text arr (off+k+1) (len-k-1) + tl = text arr (off+k+1) (len-(k+1)) !k = loop (len-1) - loop !i | i >= off && p c = loop (i+d) - | otherwise = i - where (c,d) = reverseIter t i + loop !i | i >= 0 && p c = loop (i+d) + | otherwise = i + where (c,d) = reverseIter t i {-# INLINE spanEnd_ #-} runText :: (forall s. (A.MArray s -> Int -> ST s Text) -> ST s Text) -> Text diff --git a/tests/Tests/Properties/Substrings.hs b/tests/Tests/Properties/Substrings.hs index 279ad328..392ad88f 100644 --- a/tests/Tests/Properties/Substrings.hs +++ b/tests/Tests/Properties/Substrings.hs @@ -87,8 +87,14 @@ t_strip = T.dropAround isSpace `eq` T.strip tl_strip = TL.dropAround isSpace `eq` TL.strip t_splitAt n = L.splitAt n `eqP` (unpack2 . T.splitAt n) tl_splitAt n = L.splitAt n `eqP` (unpack2 . TL.splitAt (fromIntegral n)) -t_span p = L.span p `eqP` (unpack2 . T.span p) -tl_span p = L.span p `eqP` (unpack2 . TL.span p) +t_span p = L.span p `eqP` (unpack2 . T.span p) +tl_span p = L.span p `eqP` (unpack2 . TL.span p) +t_spanEnd p = spanEnd p `eqP` (unpack2 . T.spanEnd p) +tl_spanEnd p = spanEnd p `eqP` (unpack2 . TL.spanEnd p) + +spanEnd :: (a -> Bool) -> [a] -> ([a], [a]) +spanEnd p l = case span p $ reverse l of + (s, e) -> (reverse e, reverse s) t_breakOn_id s = squid `eq` (uncurry T.append . T.breakOn s) where squid t | T.null s = error "empty" @@ -110,6 +116,13 @@ tl_breakOnEnd_end (NotEmpty s) t = in k `TL.isSuffixOf` t && (TL.null m || s `TL.isSuffixOf` m) t_break p = L.break p `eqP` (unpack2 . T.break p) tl_break p = L.break p `eqP` (unpack2 . TL.break p) +t_breakEnd p = breakEnd p `eqP` (unpack2 . T.breakEnd p) +tl_breakEnd p = breakEnd p `eqP` (unpack2 . TL.breakEnd p) + +breakEnd :: (a -> Bool) -> [a] -> ([a], [a]) +breakEnd p l = case break p $ reverse l of + (s, e) -> (reverse e, reverse s) + t_group = L.group `eqP` (map unpackS . T.group) tl_group = L.group `eqP` (map unpackS . TL.group) t_groupBy p = L.groupBy p `eqP` (map unpackS . T.groupBy p) @@ -275,6 +288,8 @@ testSubstrings = testProperty "tl_splitAt" tl_splitAt, testProperty "t_span" t_span, testProperty "tl_span" tl_span, + testProperty "t_spanEnd" t_spanEnd, + testProperty "tl_spanEnd" tl_spanEnd, testProperty "t_breakOn_id" t_breakOn_id, testProperty "tl_breakOn_id" tl_breakOn_id, testProperty "t_breakOn_start" t_breakOn_start, @@ -283,6 +298,8 @@ testSubstrings = testProperty "tl_breakOnEnd_end" tl_breakOnEnd_end, testProperty "t_break" t_break, testProperty "tl_break" tl_break, + testProperty "t_breakEnd" t_breakEnd, + testProperty "tl_breakEnd" tl_breakEnd, testProperty "t_group" t_group, testProperty "tl_group" tl_group, testProperty "t_groupBy" t_groupBy, From f3c816e2a22ea28128172ea13203e43a74f21ba7 Mon Sep 17 00:00:00 2001 From: TheMatten Date: Fri, 14 May 2021 11:49:29 +0200 Subject: [PATCH 6/6] Do not reverse chunks in lazy breakEnd --- src/Data/Text/Lazy.hs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index 4ff4008c..2dd360d9 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -1,5 +1,5 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} -{-# LANGUAGE BangPatterns, MagicHash, CPP, TypeFamilies #-} +{-# LANGUAGE BangPatterns, MagicHash, CPP, OverloadedStrings, TypeFamilies #-} {-# LANGUAGE Trustworthy #-} {-# LANGUAGE TemplateHaskellQuotes #-} @@ -1372,8 +1372,15 @@ break p t0 = break' t0 -- >>> T.breakEnd (=='0') "180cm" -- ("180","cm") breakEnd :: (Char -> Bool) -> Text -> (Text, Text) -breakEnd p src = let (a,b) = break p (reverse src) - in (reverse b, reverse a) +breakEnd p src = breakEnd' (reverseSpine src) where + reverseSpine = go Empty where + go res Empty = res + go res (Chunk t ts) = go (Chunk t res) ts + breakEnd' = go Empty where + go r Empty = (empty, r) + go r (Chunk t ts) = case T.breakEnd p t of + ("", _) -> go (Chunk t r) ts + (l, r') -> (reverseSpine (Chunk l ts), Chunk r' r) {-# INLINE breakEnd #-} -- | /O(n)/ 'span', applied to a predicate @p@ and text @t@, returns