From 344e6f0f99389af3c4b63239ef09d98b9e1a9c99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= Date: Wed, 15 Apr 2026 15:35:42 -0600 Subject: [PATCH 1/9] Add and fix haddocks in `DiffContext` and `DiffOutput` --- src/Data/Algorithm/DiffContext.hs | 52 ++++++++++++++++++------- src/Data/Algorithm/DiffOutput.hs | 65 ++++++++++++++++++++++++------- 2 files changed, 91 insertions(+), 26 deletions(-) diff --git a/src/Data/Algorithm/DiffContext.hs b/src/Data/Algorithm/DiffContext.hs index ac6761c..f1336fc 100644 --- a/src/Data/Algorithm/DiffContext.hs +++ b/src/Data/Algorithm/DiffContext.hs @@ -8,7 +8,7 @@ -- Portability : portable -- Author : David Fox (ddssff at the email service from google) -- --- Generates a grouped diff with merged runs, and outputs them in the manner of diff -u +-- Generates a grouped diff with merged runs, and outputs them in the manner of @diff -u@. ----------------------------------------------------------------------------- module Data.Algorithm.DiffContext ( ContextDiff, Hunk @@ -22,19 +22,23 @@ module Data.Algorithm.DiffContext ) where import Data.Algorithm.Diff (PolyDiff(..), Diff, getGroupedDiff) --- import Data.List (groupBy) import Data.Bifunctor import Text.PrettyPrint (Doc, text, empty, hcat) +-- | A diff consisting of disjoint 'Hunk's. type ContextDiff c = [Hunk c] + +-- | A 'Hunk' is a list of adjacent 'Diff's. type Hunk c = [Diff [c]] --- | A version of 'groupBy' that does not assume the argument function --- is transitive. This is used to partition the 'Diff' list into --- segments that begin and end with matching ('Both') text, with and --- have non-matching ('First' and 'Second') text in the middle. + +-- | Groups elements so that consecutive elements in a group satisfy the predicate. +-- This is unlike 'Data.List.groupBy' where grouped elements are only guaranteed to +-- satisfy the predicate w.r.t. the first element of the group. +-- +-- For instance, to split the input where there are two consecutive `1`s: -- --- > let notBoth1 a b = not (a == 1 || b == 1) in +-- > let notBoth1 a b = not (a == 1 && b == 1) in -- > -- > groupBy' notBoth1 [1,1,2,3,1,1,4,5,6,1] -- > [[1],[1,2,3,1],[1,4,5,6,1]] @@ -83,7 +87,7 @@ unnumber (Numbered _ a) = a -- > -k getContextDiff :: Eq a - => Maybe Int -- ^ Number of context elements, Nothing means infinite + => Maybe Int -- ^ Number of context elements, 'Nothing' means returning a whole-diff 'Hunk'. -> [a] -> [a] -> ContextDiff (Numbered a) @@ -91,38 +95,60 @@ getContextDiff context a b = getContextDiffNumbered context (numbered a) (numbered b) -- | If for some reason you need the line numbers stripped from the --- result of getContextDiff for backwards compatibility. +-- result of 'getContextDiff' for backwards compatibility. unNumberContextDiff :: ContextDiff (Numbered a) -> ContextDiff a unNumberContextDiff = fmap (fmap (bimap (fmap unnumber) (fmap unnumber))) +-- | Create a diff made of separate 'Hunk's by reducing the lists of common +-- elements surrounding each sequence of differing elements to the specified +-- @context@ number. Adjancent hunks end up merged if the list of common elements +-- between them is shorter than twice the @context@. +-- If @context@ is 'Nothing', we get a single hunk with the whole diff. getContextDiffNumbered :: Eq a - => Maybe Int -- ^ Number of context elements, Nothing means infinite + => Maybe Int -- ^ Number of context elements, 'Nothing' means returning a whole-diff 'Hunk'. -> [Numbered a] -> [Numbered a] -> ContextDiff (Numbered a) getContextDiffNumbered context a0 b0 = + -- The 'Diff' list is grouped into 'Hunks' that begin and end + -- with matching ('Both') text, having non-matching ('First' and 'Second') + -- text in the middle. Note that a non-trivial partition can only happen after + -- the matching text has been reduced to become consecutive 'Both' values + -- corresponding to a hunk's suffix and the following hunk prefix. groupBy' (\a b -> not (isBoth a && isBoth b)) $ doPrefix $ getGroupedDiff a0 b0 where isBoth (Both _ _) = True isBoth _ = False - -- Handle the common text leading up to a diff. + -- | Handle the common text leading up to a diff. + doPrefix :: Hunk a -> Hunk a doPrefix [] = [] + -- Trailing common elements are no prefix. doPrefix [Both _ _] = [] + -- Do the prefix proper. doPrefix (Both xs ys : more) = Both (maybe xs (\n -> drop (max 0 (length xs - n)) xs) context) (maybe ys (\n -> drop (max 0 (length ys - n)) ys) context) : doSuffix more - -- Prefix finished, do the diff then the following suffix + -- Prefix finished, do the diff then the following suffix. doPrefix (d : ds) = doSuffix (d : ds) - -- Handle the common text following a diff. + -- | Handle the common text following a diff. + doSuffix :: Hunk a -> Hunk a doSuffix [] = [] + -- A trailing suffix. doSuffix [Both xs ys] = [Both (maybe xs (\n -> take n xs) context) (maybe ys (\n -> take n ys) context)] + -- Infinite context or common text too short to split. doSuffix (Both xs ys : more) | maybe True (\n -> length xs <= n * 2) context = Both xs ys : doPrefix more + -- If the common text long enough, split it into a suffix and prefix + -- (resulting in some elements excluded from the diff in the middle). doSuffix (Both xs ys : more) = Both (maybe xs (\n -> take n xs) context) (maybe ys (\n -> take n ys) context) + -- NOTE: both 'mempty's here are unreachable in practice because: + -- 1. The guard above ensures that @context@ is not 'Nothing' + -- 2. Both lists have the same length. : doPrefix (Both (maybe mempty (\n -> drop n xs) context) (maybe mempty (\n -> drop n ys) context) : more) + -- Diff elements are preserved. doSuffix (d : ds) = d : doSuffix ds -- | Pretty print a ContextDiff in the manner of diff -u. diff --git a/src/Data/Algorithm/DiffOutput.hs b/src/Data/Algorithm/DiffOutput.hs index 5de98f5..4634e72 100644 --- a/src/Data/Algorithm/DiffOutput.hs +++ b/src/Data/Algorithm/DiffOutput.hs @@ -8,7 +8,7 @@ -- Portability : portable -- Author : Stephan Wehr (wehr@factisresearch.com) and JP Moresmau (jp@moresmau.fr) -- --- Generates a string output that is similar to diff normal mode +-- Generates a string output that is similar to diff normal mode. ----------------------------------------------------------------------------- module Data.Algorithm.DiffOutput where import Data.Algorithm.Diff @@ -16,34 +16,45 @@ import Text.PrettyPrint hiding ((<>)) import Data.Char import Data.List --- | Converts Diffs to DiffOperations +-- | Converts 'Diff's to 'DiffOperation's. diffToLineRanges :: [Diff [String]] -> [DiffOperation LineRange] diffToLineRanges = toLineRange 1 1 where toLineRange :: Int -> Int -> [Diff [String]] -> [DiffOperation LineRange] toLineRange _ _ []=[] + -- If the lines are the same, we just move forward. toLineRange leftLine rightLine (Both ls _:rs)= let lins=length ls in toLineRange (leftLine+lins) (rightLine+lins) rs + -- A 'Change' is introduced when an addition is followed by a deletion, or vice versa. toLineRange leftLine rightLine (Second lsS:First lsF:rs)= toChange leftLine rightLine lsF lsS rs toLineRange leftLine rightLine (First lsF:Second lsS:rs)= toChange leftLine rightLine lsF lsS rs + -- Introduce 'Addition's. toLineRange leftLine rightLine (Second lsS:rs)= let linesS=length lsS diff=Addition (LineRange (rightLine,rightLine+linesS-1) lsS) (leftLine-1) in diff : toLineRange leftLine (rightLine+linesS) rs + -- Introduce 'Deletion's. toLineRange leftLine rightLine (First lsF:rs)= let linesF=length lsF diff=Deletion (LineRange (leftLine,leftLine+linesF-1) lsF) (rightLine-1) in diff: toLineRange(leftLine+linesF) rightLine rs + -- | Build 'Change's from adjacent additions and deletions. + toChange :: Int -- ^ Current left line number. + -> Int -- ^ Current right line number. + -> [String] -- ^ Lines from the 'First' list (corresponding to deletions). + -> [String] -- ^ Lines from the 'Second' list (corresponding to additions). + -> [Diff [String]] -- ^ Remaining 'Diff's. + -> [DiffOperation LineRange] toChange leftLine rightLine lsF lsS rs= let linesS=length lsS linesF=length lsF in Change (LineRange (leftLine,leftLine+linesF-1) lsF) (LineRange (rightLine,rightLine+linesS-1) lsS) : toLineRange (leftLine+linesF) (rightLine+linesS) rs --- | pretty print the differences. The output is similar to the output of the diff utility +-- | Pretty print the differences. The output is similar to the output of the @diff@ utility. -- -- > > putStr (ppDiff (getGroupedDiff ["a","b","c","d","e"] ["a","c","d","f"])) -- > 2d1 @@ -59,7 +70,7 @@ ppDiff gdiff = render (prettyDiffs diffLineRanges) ++ "\n" --- | pretty print of diff operations +-- | Pretty print of diff operations. prettyDiffs :: [DiffOperation LineRange] -> Doc prettyDiffs [] = empty prettyDiffs (d : rest) = prettyDiff d $$ prettyDiffs rest @@ -80,58 +91,86 @@ prettyDiffs (d : rest) = prettyDiff d $$ prettyDiffs rest prettyLines start lins = vcat (map (\l -> char start <+> text l) lins) --- | Parse pretty printed Diffs as DiffOperations +-- | Parse pretty printed 'Diff's as 'DiffOperation's. parsePrettyDiffs :: String -> [DiffOperation LineRange] parsePrettyDiffs = reverse . doParse [] . lines where - doParse diffs [] = diffs - doParse diffs s = + -- | Parsing entry point that iteratively accumulates 'DiffOperation's + -- until the input is exhausted. + doParse :: [DiffOperation LineRange] -> [String] -> [DiffOperation LineRange] + -- NOTE: Incorrectly formatted lines are ignored. + doParse acc [] = acc + doParse acc s = let (mnd,r) = parseDiff s in case mnd of - Just nd -> doParse (nd:diffs) r - _ -> doParse diffs r + Just nd -> doParse (nd:acc) r + _ -> doParse acc r + + parseDiff :: [String] -> (Maybe (DiffOperation LineRange), [String]) parseDiff [] = (Nothing,[]) parseDiff (h:rs) = let (r1,hrs1) = parseRange h in case hrs1 of + -- In each case, we pass the left line range, + -- the remaining string after the type character, + -- which must contain the right line range, + -- and the remaining lines to parse. ('d':hrs2) -> parseDel r1 hrs2 rs ('a':hrs2) -> parseAdd r1 hrs2 rs ('c':hrs2) -> parseChange r1 hrs2 rs _ -> (Nothing,rs) + + parseDel :: (LineNo, LineNo) -> String -> [String] -> (Maybe (DiffOperation LineRange), [String]) parseDel r1 hrs2 rs = let + -- NOTE: the wildcard should correspond to the end of line, + -- but is ignored for simplicity. (r2,_) = parseRange hrs2 (ls,rs2) = span (isPrefixOf "<") rs in (Just $ Deletion (LineRange r1 (map (drop 2) ls)) (fst r2), rs2) + + parseAdd :: (LineNo, LineNo) -> String -> [String] -> (Maybe (DiffOperation LineRange), [String]) parseAdd r1 hrs2 rs = let + -- NOTE: the wildcard should correspond to the end of line, + -- but is ignored for simplicity. (r2,_) = parseRange hrs2 (ls,rs2) = span (isPrefixOf ">") rs in (Just $ Addition (LineRange r2 (map (drop 2) ls)) (fst r1), rs2) + + parseChange :: (LineNo, LineNo) -> String -> [String] -> (Maybe (DiffOperation LineRange), [String]) parseChange r1 hrs2 rs = let + -- NOTE: the wildcard should correspond to the end of line, + -- but is ignored for simplicity. (r2,_) = parseRange hrs2 (ls1,rs2) = span (isPrefixOf "<") rs in case rs2 of + -- The left and right diff of a 'Change' are separated by a "---" line. ("---":rs3) -> let (ls2,rs4) = span (isPrefixOf ">") rs3 in (Just $ Change (LineRange r1 (map (drop 2) ls1)) (LineRange r2 (map (drop 2) ls2)), rs4) _ -> (Nothing,rs2) + parseRange :: String -> ((LineNo, LineNo),String) parseRange l = let (fstLine,rs) = span isDigit l (sndLine,rs3) = case rs of + -- The comma is used to separate + -- the start and end line numbers in a range, + -- but is omitted if they are the same. + -- i.e. the range is a single line. (',':rs2) -> span isDigit rs2 _ -> (fstLine,rs) in ((read fstLine,read sndLine),rs3) --- | Line number alias +-- | Line number alias. type LineNo = Int --- | Line Range: start, end and contents +-- | Line Range: start, end and contents. data LineRange = LineRange { lrNumbers :: (LineNo, LineNo) , lrContents :: [String] } - deriving (Show,Read,Eq,Ord) + deriving (Show, Read, Eq, Ord) --- | Diff Operation representing changes to apply +-- | Diff operation representing changes to apply. data DiffOperation a = Deletion a LineNo | Addition a LineNo | Change a a From ecc15f18105cd624f137c5475cc380d3a02d0280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= Date: Wed, 22 Apr 2026 15:41:23 -0600 Subject: [PATCH 2/9] Change `context` to `contextSize` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Facundo Domínguez --- src/Data/Algorithm/DiffContext.hs | 33 +++++++++++++++++-------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/src/Data/Algorithm/DiffContext.hs b/src/Data/Algorithm/DiffContext.hs index f1336fc..7e849d7 100644 --- a/src/Data/Algorithm/DiffContext.hs +++ b/src/Data/Algorithm/DiffContext.hs @@ -87,12 +87,12 @@ unnumber (Numbered _ a) = a -- > -k getContextDiff :: Eq a - => Maybe Int -- ^ Number of context elements, 'Nothing' means returning a whole-diff 'Hunk'. + => Maybe Int -- ^ Context size. 'Nothing' means returning a whole-diff 'Hunk'. -> [a] -> [a] -> ContextDiff (Numbered a) -getContextDiff context a b = - getContextDiffNumbered context (numbered a) (numbered b) +getContextDiff contextSize a b = + getContextDiffNumbered contextSize (numbered a) (numbered b) -- | If for some reason you need the line numbers stripped from the -- result of 'getContextDiff' for backwards compatibility. @@ -101,16 +101,19 @@ unNumberContextDiff = fmap (fmap (bimap (fmap unnumber) (fmap unnumber))) -- | Create a diff made of separate 'Hunk's by reducing the lists of common -- elements surrounding each sequence of differing elements to the specified --- @context@ number. Adjancent hunks end up merged if the list of common elements --- between them is shorter than twice the @context@. --- If @context@ is 'Nothing', we get a single hunk with the whole diff. +-- @contextSize@. +-- +-- The context size determines when to merge adjacent hunks: +-- two hunks are merged when the number of common elements between them does not +-- exceed twice the context size. Furthermore, if @contextSize@ is 'Nothing' +-- a single hunk with the whole diff is produced. getContextDiffNumbered :: Eq a - => Maybe Int -- ^ Number of context elements, 'Nothing' means returning a whole-diff 'Hunk'. + => Maybe Int -- ^ Context size. 'Nothing' means returning a whole-diff 'Hunk'. -> [Numbered a] -> [Numbered a] -> ContextDiff (Numbered a) -getContextDiffNumbered context a0 b0 = +getContextDiffNumbered contextSize a0 b0 = -- The 'Diff' list is grouped into 'Hunks' that begin and end -- with matching ('Both') text, having non-matching ('First' and 'Second') -- text in the middle. Note that a non-trivial partition can only happen after @@ -127,27 +130,27 @@ getContextDiffNumbered context a0 b0 = doPrefix [Both _ _] = [] -- Do the prefix proper. doPrefix (Both xs ys : more) = - Both (maybe xs (\n -> drop (max 0 (length xs - n)) xs) context) - (maybe ys (\n -> drop (max 0 (length ys - n)) ys) context) : doSuffix more + Both (maybe xs (\n -> drop (max 0 (length xs - n)) xs) contextSize) + (maybe ys (\n -> drop (max 0 (length ys - n)) ys) contextSize) : doSuffix more -- Prefix finished, do the diff then the following suffix. doPrefix (d : ds) = doSuffix (d : ds) -- | Handle the common text following a diff. doSuffix :: Hunk a -> Hunk a doSuffix [] = [] -- A trailing suffix. - doSuffix [Both xs ys] = [Both (maybe xs (\n -> take n xs) context) (maybe ys (\n -> take n ys) context)] - -- Infinite context or common text too short to split. + doSuffix [Both xs ys] = [Both (maybe xs (\n -> take n xs) contextSize) (maybe ys (\n -> take n ys) contextSize)] + -- Either whole context or common text is too short to split. doSuffix (Both xs ys : more) - | maybe True (\n -> length xs <= n * 2) context = + | maybe True (\n -> length xs <= n * 2) contextSize = Both xs ys : doPrefix more -- If the common text long enough, split it into a suffix and prefix -- (resulting in some elements excluded from the diff in the middle). doSuffix (Both xs ys : more) = - Both (maybe xs (\n -> take n xs) context) (maybe ys (\n -> take n ys) context) + Both (maybe xs (\n -> take n xs) contextSize) (maybe ys (\n -> take n ys) contextSize) -- NOTE: both 'mempty's here are unreachable in practice because: -- 1. The guard above ensures that @context@ is not 'Nothing' -- 2. Both lists have the same length. - : doPrefix (Both (maybe mempty (\n -> drop n xs) context) (maybe mempty (\n -> drop n ys) context) : more) + : doPrefix (Both (maybe mempty (\n -> drop n xs) contextSize) (maybe mempty (\n -> drop n ys) contextSize) : more) -- Diff elements are preserved. doSuffix (d : ds) = d : doSuffix ds From c0b380d6e2afbbe7574ebe6315f99f71ec321afd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= Date: Wed, 22 Apr 2026 15:41:23 -0600 Subject: [PATCH 3/9] Extend `diffToLineRanges` haddock with behavior details MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Facundo Domínguez --- src/Data/Algorithm/DiffOutput.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Data/Algorithm/DiffOutput.hs b/src/Data/Algorithm/DiffOutput.hs index 4634e72..4c71316 100644 --- a/src/Data/Algorithm/DiffOutput.hs +++ b/src/Data/Algorithm/DiffOutput.hs @@ -16,7 +16,9 @@ import Text.PrettyPrint hiding ((<>)) import Data.Char import Data.List --- | Converts 'Diff's to 'DiffOperation's. +-- | Converts 'Diff's to 'DiffOperation's. 'First' and 'Second' +-- ocurrances are converted to 'Addition' and 'Deletion', respectively, while +-- consecutive ocurrances of them are replaced by a 'Change'. diffToLineRanges :: [Diff [String]] -> [DiffOperation LineRange] diffToLineRanges = toLineRange 1 1 where From 75f3924aca25df7be908a85ce02ce59bfd20d5ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= Date: Wed, 22 Apr 2026 16:05:59 -0600 Subject: [PATCH 4/9] Add `toLineRange` local function haddock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Facundo Domínguez --- src/Data/Algorithm/DiffOutput.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Data/Algorithm/DiffOutput.hs b/src/Data/Algorithm/DiffOutput.hs index 4c71316..44ca0f1 100644 --- a/src/Data/Algorithm/DiffOutput.hs +++ b/src/Data/Algorithm/DiffOutput.hs @@ -22,6 +22,9 @@ import Data.List diffToLineRanges :: [Diff [String]] -> [DiffOperation LineRange] diffToLineRanges = toLineRange 1 1 where + -- | In @toLineRange x y ds@, @x@ is the index of the current string in the + -- left input of the diff @ds@, and @y@ is the index of the corresponding + -- string in the right input of the diff @ds@. toLineRange :: Int -> Int -> [Diff [String]] -> [DiffOperation LineRange] toLineRange _ _ []=[] -- If the lines are the same, we just move forward. From cf8e33124548046eb0f1db493000eb7facdb3a6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= Date: Wed, 22 Apr 2026 16:22:55 -0600 Subject: [PATCH 5/9] Add `DiffOperation` constructor haddocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Facundo Domínguez --- src/Data/Algorithm/DiffOutput.hs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Data/Algorithm/DiffOutput.hs b/src/Data/Algorithm/DiffOutput.hs index 44ca0f1..7a2a122 100644 --- a/src/Data/Algorithm/DiffOutput.hs +++ b/src/Data/Algorithm/DiffOutput.hs @@ -176,7 +176,10 @@ data LineRange = LineRange { lrNumbers :: (LineNo, LineNo) deriving (Show, Read, Eq, Ord) -- | Diff operation representing changes to apply. -data DiffOperation a = Deletion a LineNo - | Addition a LineNo - | Change a a - deriving (Show,Read,Eq,Ord) +data DiffOperation a + = Deletion a LineNo -- ^ Element deleted on the left input, line number + -- preceding the deleted lines in the right input. + | Addition a LineNo -- ^ Element added from the right input, line number + -- preceding the added lines in the left input. + | Change a a -- ^ Element changed from the left input to the right input. + deriving (Show,Read,Eq,Ord) From aee6f8a3ec7ed7d16aedf0b99be9e0f37a83c9fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= Date: Wed, 22 Apr 2026 16:22:55 -0600 Subject: [PATCH 6/9] Add more invariants to the documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Facundo Domínguez --- src/Data/Algorithm/DiffOutput.hs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Data/Algorithm/DiffOutput.hs b/src/Data/Algorithm/DiffOutput.hs index 7a2a122..10e1a07 100644 --- a/src/Data/Algorithm/DiffOutput.hs +++ b/src/Data/Algorithm/DiffOutput.hs @@ -166,10 +166,17 @@ parsePrettyDiffs = reverse . doParse [] . lines _ -> (fstLine,rs) in ((read fstLine,read sndLine),rs3) --- | Line number alias. +-- | Line number alias. Always non-negative. type LineNo = Int -- | Line Range: start, end and contents. +-- +-- The following invariants hold: +-- +-- > snd lrNumbers >= fst lrNumbers +-- > snd lrNumbers - fst lrNumbers + 1 == length lrContents +-- +-- which imply @lrContents@ cannot be empty. data LineRange = LineRange { lrNumbers :: (LineNo, LineNo) , lrContents :: [String] } From dc64b8b9d6b56307c0f190590f1d520bcaa27fe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= Date: Thu, 23 Apr 2026 09:53:33 -0600 Subject: [PATCH 7/9] Improve unreacheable code path comment --- src/Data/Algorithm/DiffContext.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Data/Algorithm/DiffContext.hs b/src/Data/Algorithm/DiffContext.hs index 7e849d7..b768e60 100644 --- a/src/Data/Algorithm/DiffContext.hs +++ b/src/Data/Algorithm/DiffContext.hs @@ -146,10 +146,9 @@ getContextDiffNumbered contextSize a0 b0 = -- If the common text long enough, split it into a suffix and prefix -- (resulting in some elements excluded from the diff in the middle). doSuffix (Both xs ys : more) = + -- NOTE: the guard above ensures that the following 'maybe's + -- default values are unreachable and result in non-empty lists. Both (maybe xs (\n -> take n xs) contextSize) (maybe ys (\n -> take n ys) contextSize) - -- NOTE: both 'mempty's here are unreachable in practice because: - -- 1. The guard above ensures that @context@ is not 'Nothing' - -- 2. Both lists have the same length. : doPrefix (Both (maybe mempty (\n -> drop n xs) contextSize) (maybe mempty (\n -> drop n ys) contextSize) : more) -- Diff elements are preserved. doSuffix (d : ds) = d : doSuffix ds From 24c5baac5511ef5828ddb1bbc97ccb09a35890fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= Date: Fri, 24 Apr 2026 13:54:38 -0600 Subject: [PATCH 8/9] Add more invariant documentation Co-authored-by: Facundo Dominguez --- src/Data/Algorithm/Diff.hs | 3 +++ src/Data/Algorithm/DiffContext.hs | 23 ++++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/Data/Algorithm/Diff.hs b/src/Data/Algorithm/Diff.hs index 42024c9..4036fc2 100644 --- a/src/Data/Algorithm/Diff.hs +++ b/src/Data/Algorithm/Diff.hs @@ -305,6 +305,9 @@ getDiffBy eq a b = markup a b . reverse $ ses eq a b markup _ _ _ = [] -- | Like 'getGroupedDiff' but accepts a custom equality predicate. +-- +-- Postcondition: the output list is guaranteed to be /chunked/. i.e. no two adjacent +-- elements share the same constructor. getGroupedDiffBy :: (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff [a] [b]] getGroupedDiffBy eq a b = go $ getDiffBy eq a b where go (First x : xs) = let (fs, rest) = goFirsts xs in First (x:fs) : go rest diff --git a/src/Data/Algorithm/DiffContext.hs b/src/Data/Algorithm/DiffContext.hs index b768e60..29bea68 100644 --- a/src/Data/Algorithm/DiffContext.hs +++ b/src/Data/Algorithm/DiffContext.hs @@ -29,6 +29,10 @@ import Text.PrettyPrint (Doc, text, empty, hcat) type ContextDiff c = [Hunk c] -- | A 'Hunk' is a list of adjacent 'Diff's. +-- +-- No two consecutive elements in a 'Hunk' are both applications +-- of 'First', 'Second', or 'Both', i.e. the list does not stutter +-- on 'Diff' constructors. type Hunk c = [Diff [c]] @@ -99,9 +103,8 @@ getContextDiff contextSize a b = unNumberContextDiff :: ContextDiff (Numbered a) -> ContextDiff a unNumberContextDiff = fmap (fmap (bimap (fmap unnumber) (fmap unnumber))) --- | Create a diff made of separate 'Hunk's by reducing the lists of common --- elements surrounding each sequence of differing elements to the specified --- @contextSize@. +-- | Create a diff of separate 'Hunk's, each containing a sequence +-- of differing elements surrounded by common elements for context. -- -- The context size determines when to merge adjacent hunks: -- two hunks are merged when the number of common elements between them does not @@ -124,9 +127,20 @@ getContextDiffNumbered contextSize a0 b0 = isBoth (Both _ _) = True isBoth _ = False -- | Handle the common text leading up to a diff. + -- + -- The @a@ elements in @doPrefix h@ are a subset of those in @h@, + -- in the same order. Additionaly, 'First' and 'Second' diffs + -- are identical in both lists. + -- + -- The difference between input and output is that some 'Both' diffs might + -- be split into two other 'Both' diffs. This hapṕens when their contents + -- are too large compared with the contex size, resulting in some @a@ + -- elements being dropped. doPrefix :: Hunk a -> Hunk a doPrefix [] = [] -- Trailing common elements are no prefix. + -- This case corresponds to when both input lists are identical, so the + -- resulting 'ContextDiff' is empty. doPrefix [Both _ _] = [] -- Do the prefix proper. doPrefix (Both xs ys : more) = @@ -135,6 +149,9 @@ getContextDiffNumbered contextSize a0 b0 = -- Prefix finished, do the diff then the following suffix. doPrefix (d : ds) = doSuffix (d : ds) -- | Handle the common text following a diff. + -- + -- Precondition: The input does not start with a 'Both' diff. Otherwise, + -- it behaves like @doPrefix@. doSuffix :: Hunk a -> Hunk a doSuffix [] = [] -- A trailing suffix. From 8cdc2b51b411e98ccfa7ba1349f486be4a8440ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= Date: Thu, 30 Apr 2026 14:43:55 -0600 Subject: [PATCH 9/9] Further refinements --- src/Data/Algorithm/DiffContext.hs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Data/Algorithm/DiffContext.hs b/src/Data/Algorithm/DiffContext.hs index 29bea68..0f8eb47 100644 --- a/src/Data/Algorithm/DiffContext.hs +++ b/src/Data/Algorithm/DiffContext.hs @@ -128,12 +128,12 @@ getContextDiffNumbered contextSize a0 b0 = isBoth _ = False -- | Handle the common text leading up to a diff. -- - -- The @a@ elements in @doPrefix h@ are a subset of those in @h@, + -- Postcondition: The @a@ elements in @doPrefix h@ are a subset of those in @h@, -- in the same order. Additionaly, 'First' and 'Second' diffs -- are identical in both lists. -- -- The difference between input and output is that some 'Both' diffs might - -- be split into two other 'Both' diffs. This hapṕens when their contents + -- be split into two other 'Both' diffs. This happens when their contents -- are too large compared with the contex size, resulting in some @a@ -- elements being dropped. doPrefix :: Hunk a -> Hunk a @@ -142,7 +142,7 @@ getContextDiffNumbered contextSize a0 b0 = -- This case corresponds to when both input lists are identical, so the -- resulting 'ContextDiff' is empty. doPrefix [Both _ _] = [] - -- Do the prefix proper. + -- Do the prefix and then make the suffix. doPrefix (Both xs ys : more) = Both (maybe xs (\n -> drop (max 0 (length xs - n)) xs) contextSize) (maybe ys (\n -> drop (max 0 (length ys - n)) ys) contextSize) : doSuffix more @@ -160,14 +160,17 @@ getContextDiffNumbered contextSize a0 b0 = doSuffix (Both xs ys : more) | maybe True (\n -> length xs <= n * 2) contextSize = Both xs ys : doPrefix more - -- If the common text long enough, split it into a suffix and prefix + -- If the common text is too short compared with the context, + -- we preserve it and continue. As the following element cannot be a 'Both' + -- as well, this effectively places the common text in the inner part of the diff. + -- Otherwise, we split it into a suffix and prefix -- (resulting in some elements excluded from the diff in the middle). doSuffix (Both xs ys : more) = -- NOTE: the guard above ensures that the following 'maybe's -- default values are unreachable and result in non-empty lists. Both (maybe xs (\n -> take n xs) contextSize) (maybe ys (\n -> take n ys) contextSize) : doPrefix (Both (maybe mempty (\n -> drop n xs) contextSize) (maybe mempty (\n -> drop n ys) contextSize) : more) - -- Diff elements are preserved. + -- 'First' and 'Second' elements are no suffix, preserve them and continue looking. doSuffix (d : ds) = d : doSuffix ds -- | Pretty print a ContextDiff in the manner of diff -u.