From 344e6f0f99389af3c4b63239ef09d98b9e1a9c99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= <xavier.gongora@tweag.io>
Date: Wed, 15 Apr 2026 15:35:42 -0600
Subject: [PATCH 1/9] Add and fix haddocks in `DiffContext` and `DiffOutput`

---
 src/Data/Algorithm/DiffContext.hs | 52 ++++++++++++++++++-------
 src/Data/Algorithm/DiffOutput.hs  | 65 ++++++++++++++++++++++++-------
 2 files changed, 91 insertions(+), 26 deletions(-)

diff --git a/src/Data/Algorithm/DiffContext.hs b/src/Data/Algorithm/DiffContext.hs
index ac6761c..f1336fc 100644
--- a/src/Data/Algorithm/DiffContext.hs
+++ b/src/Data/Algorithm/DiffContext.hs
@@ -8,7 +8,7 @@
 -- Portability :  portable
 -- Author      :  David Fox (ddssff at the email service from google)
 --
--- Generates a grouped diff with merged runs, and outputs them in the manner of diff -u
+-- Generates a grouped diff with merged runs, and outputs them in the manner of @diff -u@.
 -----------------------------------------------------------------------------
 module Data.Algorithm.DiffContext
     ( ContextDiff, Hunk
@@ -22,19 +22,23 @@ module Data.Algorithm.DiffContext
     ) where
 
 import Data.Algorithm.Diff (PolyDiff(..), Diff, getGroupedDiff)
--- import Data.List (groupBy)
 import Data.Bifunctor
 import Text.PrettyPrint (Doc, text, empty, hcat)
 
+-- | A diff consisting of disjoint 'Hunk's.
 type ContextDiff c = [Hunk c]
+
+-- | A 'Hunk' is a list of adjacent 'Diff's.
 type Hunk c = [Diff [c]]
 
--- | A version of 'groupBy' that does not assume the argument function
--- is transitive.  This is used to partition the 'Diff' list into
--- segments that begin and end with matching ('Both') text, with and
--- have non-matching ('First' and 'Second') text in the middle.
+
+-- | Groups elements so that consecutive elements in a group satisfy the predicate.
+-- This is unlike 'Data.List.groupBy' where grouped elements are only guaranteed to
+-- satisfy the predicate w.r.t. the first element of the group.
+--
+-- For instance, to split the input where there are two consecutive `1`s:
 --
---     > let notBoth1 a b = not (a == 1 || b == 1) in
+--     > let notBoth1 a b = not (a == 1 && b == 1) in
 --     >
 --     > groupBy' notBoth1 [1,1,2,3,1,1,4,5,6,1]
 --     > [[1],[1,2,3,1],[1,4,5,6,1]]
@@ -83,7 +87,7 @@ unnumber (Numbered _ a) = a
 -- > -k
 getContextDiff ::
   Eq a
-  => Maybe Int -- ^ Number of context elements, Nothing means infinite
+  => Maybe Int -- ^ Number of context elements, 'Nothing' means returning a whole-diff 'Hunk'.
   -> [a]
   -> [a]
   -> ContextDiff (Numbered a)
@@ -91,38 +95,60 @@ getContextDiff context a b =
   getContextDiffNumbered context (numbered a) (numbered b)
 
 -- | If for some reason you need the line numbers stripped from the
--- result of getContextDiff for backwards compatibility.
+-- result of 'getContextDiff' for backwards compatibility.
 unNumberContextDiff :: ContextDiff (Numbered a) -> ContextDiff a
 unNumberContextDiff = fmap (fmap (bimap (fmap unnumber) (fmap unnumber)))
 
+-- | Create a diff made of separate 'Hunk's by reducing the lists of common
+-- elements surrounding each sequence of differing elements to the specified
+-- @context@ number. Adjancent hunks end up merged if the list of common elements
+-- between them is shorter than twice the @context@.
+-- If @context@ is 'Nothing', we get a single hunk with the whole diff.
 getContextDiffNumbered ::
   Eq a
-  => Maybe Int -- ^ Number of context elements, Nothing means infinite
+  => Maybe Int -- ^ Number of context elements, 'Nothing' means returning a whole-diff 'Hunk'.
   -> [Numbered a]
   -> [Numbered a]
   -> ContextDiff (Numbered a)
 getContextDiffNumbered context a0 b0 =
+    -- The 'Diff' list is grouped into 'Hunks' that begin and end
+    -- with matching ('Both') text, having non-matching ('First' and 'Second')
+    -- text in the middle. Note that a non-trivial partition can only happen after
+    -- the matching text has been reduced to become consecutive 'Both' values
+    -- corresponding to a hunk's suffix and the following hunk prefix.
     groupBy' (\a b -> not (isBoth a && isBoth b)) $ doPrefix $ getGroupedDiff a0 b0
     where
       isBoth (Both _ _) = True
       isBoth _ = False
-      -- Handle the common text leading up to a diff.
+      -- | Handle the common text leading up to a diff.
+      doPrefix :: Hunk a -> Hunk a
       doPrefix [] = []
+      -- Trailing common elements are no prefix.
       doPrefix [Both _ _] = []
+      -- Do the prefix proper.
       doPrefix (Both xs ys : more) =
           Both (maybe xs (\n -> drop (max 0 (length xs - n)) xs) context)
                (maybe ys (\n -> drop (max 0 (length ys - n)) ys) context) : doSuffix more
-      -- Prefix finished, do the diff then the following suffix
+      -- Prefix finished, do the diff then the following suffix.
       doPrefix (d : ds) = doSuffix (d : ds)
-      -- Handle the common text following a diff.
+      -- | Handle the common text following a diff.
+      doSuffix :: Hunk a -> Hunk a
       doSuffix [] = []
+      -- A trailing suffix.
       doSuffix [Both xs ys] = [Both (maybe xs (\n -> take n xs) context) (maybe ys (\n -> take n ys) context)]
+      -- Infinite context or common text too short to split.
       doSuffix (Both xs ys : more)
           | maybe True (\n -> length xs <= n * 2) context =
               Both xs ys : doPrefix more
+      -- If the common text long enough, split it into a suffix and prefix
+      -- (resulting in some elements excluded from the diff in the middle).
       doSuffix (Both xs ys : more) =
           Both (maybe xs (\n -> take n xs) context) (maybe ys (\n -> take n ys) context)
+                   -- NOTE: both 'mempty's here are unreachable in practice because:
+                   -- 1. The guard above ensures that @context@ is not 'Nothing'
+                   -- 2. Both lists have the same length.
                    : doPrefix (Both (maybe mempty (\n -> drop n xs) context) (maybe mempty (\n -> drop n ys) context) : more)
+      -- Diff elements are preserved.
       doSuffix (d : ds) = d : doSuffix ds
 
 -- | Pretty print a ContextDiff in the manner of diff -u.
diff --git a/src/Data/Algorithm/DiffOutput.hs b/src/Data/Algorithm/DiffOutput.hs
index 5de98f5..4634e72 100644
--- a/src/Data/Algorithm/DiffOutput.hs
+++ b/src/Data/Algorithm/DiffOutput.hs
@@ -8,7 +8,7 @@
 -- Portability :  portable
 -- Author      :  Stephan Wehr (wehr@factisresearch.com) and JP Moresmau (jp@moresmau.fr)
 --
--- Generates a string output that is similar to diff normal mode
+-- Generates a string output that is similar to diff normal mode.
 -----------------------------------------------------------------------------
 module Data.Algorithm.DiffOutput where
 import Data.Algorithm.Diff
@@ -16,34 +16,45 @@ import Text.PrettyPrint hiding ((<>))
 import Data.Char
 import Data.List
 
--- | Converts Diffs to DiffOperations
+-- | Converts 'Diff's to 'DiffOperation's.
 diffToLineRanges :: [Diff [String]] -> [DiffOperation LineRange]
 diffToLineRanges = toLineRange 1 1
    where
           toLineRange :: Int -> Int -> [Diff [String]] -> [DiffOperation LineRange]
           toLineRange _ _ []=[]
+          -- If the lines are the same, we just move forward.
           toLineRange leftLine rightLine (Both ls _:rs)=
                 let lins=length ls
                 in  toLineRange (leftLine+lins) (rightLine+lins) rs
+          -- A 'Change' is introduced when an addition is followed by a deletion, or vice versa.
           toLineRange leftLine rightLine (Second lsS:First lsF:rs)=
                 toChange leftLine rightLine lsF lsS rs
           toLineRange leftLine rightLine (First lsF:Second lsS:rs)=
                 toChange leftLine rightLine lsF lsS rs
+          -- Introduce 'Addition's.
           toLineRange leftLine rightLine (Second lsS:rs)=
                 let linesS=length lsS
                     diff=Addition (LineRange (rightLine,rightLine+linesS-1) lsS) (leftLine-1)
                 in  diff : toLineRange leftLine (rightLine+linesS) rs
+          -- Introduce 'Deletion's.
           toLineRange leftLine rightLine  (First lsF:rs)=
                 let linesF=length lsF
                     diff=Deletion (LineRange (leftLine,leftLine+linesF-1) lsF) (rightLine-1)
                 in  diff: toLineRange(leftLine+linesF) rightLine rs
+          -- | Build 'Change's from adjacent additions and deletions.
+          toChange :: Int -- ^ Current left line number.
+                   -> Int -- ^ Current right line number.
+                   -> [String] -- ^ Lines from the 'First' list (corresponding to deletions).
+                   -> [String] -- ^ Lines from the 'Second' list (corresponding to additions).
+                   -> [Diff [String]] -- ^ Remaining 'Diff's.
+                   -> [DiffOperation LineRange]
           toChange leftLine rightLine lsF lsS rs=
                 let linesS=length lsS
                     linesF=length lsF
                 in  Change (LineRange (leftLine,leftLine+linesF-1) lsF) (LineRange (rightLine,rightLine+linesS-1) lsS)
                         : toLineRange (leftLine+linesF) (rightLine+linesS) rs
 
--- | pretty print the differences. The output is similar to the output of the diff utility
+-- | Pretty print the differences. The output is similar to the output of the @diff@ utility.
 --
 -- > > putStr (ppDiff (getGroupedDiff ["a","b","c","d","e"] ["a","c","d","f"]))
 -- > 2d1
@@ -59,7 +70,7 @@ ppDiff gdiff =
         render (prettyDiffs diffLineRanges) ++ "\n"
 
 
--- | pretty print of diff operations
+-- | Pretty print of diff operations.
 prettyDiffs :: [DiffOperation LineRange] -> Doc
 prettyDiffs [] = empty
 prettyDiffs (d : rest) = prettyDiff d $$ prettyDiffs rest
@@ -80,58 +91,86 @@ prettyDiffs (d : rest) = prettyDiff d $$ prettyDiffs rest
       prettyLines start lins =
           vcat (map (\l -> char start <+> text l) lins)
 
--- | Parse pretty printed Diffs as DiffOperations
+-- | Parse pretty printed 'Diff's as 'DiffOperation's.
 parsePrettyDiffs :: String -> [DiffOperation LineRange]
 parsePrettyDiffs = reverse . doParse [] . lines
   where
-    doParse diffs [] = diffs
-    doParse diffs s =
+    -- | Parsing entry point that iteratively accumulates 'DiffOperation's
+    -- until the input is exhausted.
+    doParse :: [DiffOperation LineRange] -> [String] -> [DiffOperation LineRange]
+    -- NOTE: Incorrectly formatted lines are ignored.
+    doParse acc [] = acc
+    doParse acc s =
         let (mnd,r) = parseDiff s
         in case mnd of
-            Just nd -> doParse (nd:diffs) r
-            _          -> doParse diffs r
+            Just nd -> doParse (nd:acc) r
+            _          -> doParse acc r
+
+    parseDiff :: [String] -> (Maybe (DiffOperation LineRange), [String])
     parseDiff [] = (Nothing,[])
     parseDiff (h:rs) = let
         (r1,hrs1) = parseRange h
         in case hrs1 of
+                -- In each case, we pass the left line range,
+                -- the remaining string after the type character,
+                -- which must contain the right line range,
+                -- and the remaining lines to parse.
                 ('d':hrs2) -> parseDel r1 hrs2 rs
                 ('a':hrs2) -> parseAdd r1 hrs2 rs
                 ('c':hrs2) -> parseChange r1 hrs2 rs
                 _ -> (Nothing,rs)
+
+    parseDel :: (LineNo, LineNo) -> String -> [String] -> (Maybe (DiffOperation LineRange), [String])
     parseDel r1 hrs2 rs = let
+        -- NOTE: the wildcard should correspond to the end of line,
+        -- but is ignored for simplicity.
         (r2,_) = parseRange hrs2
         (ls,rs2) = span (isPrefixOf "<") rs
         in (Just $ Deletion (LineRange r1 (map (drop 2) ls)) (fst r2), rs2)
+
+    parseAdd :: (LineNo, LineNo) -> String -> [String] -> (Maybe (DiffOperation LineRange), [String])
     parseAdd r1 hrs2 rs = let
+        -- NOTE: the wildcard should correspond to the end of line,
+        -- but is ignored for simplicity.
         (r2,_) = parseRange hrs2
         (ls,rs2) = span (isPrefixOf ">") rs
         in (Just $ Addition (LineRange r2 (map (drop 2) ls)) (fst r1), rs2)
+
+    parseChange :: (LineNo, LineNo) -> String -> [String] -> (Maybe (DiffOperation LineRange), [String])
     parseChange r1 hrs2 rs = let
+        -- NOTE: the wildcard should correspond to the end of line,
+        -- but is ignored for simplicity.
         (r2,_) = parseRange hrs2
         (ls1,rs2) = span (isPrefixOf "<") rs
         in case rs2 of
+            -- The left and right diff of a 'Change' are separated by a "---" line.
             ("---":rs3) -> let
                 (ls2,rs4) = span (isPrefixOf ">") rs3
                 in (Just $ Change (LineRange r1 (map (drop 2) ls1)) (LineRange r2 (map (drop 2) ls2)), rs4)
             _ -> (Nothing,rs2)
+
     parseRange :: String -> ((LineNo, LineNo),String)
     parseRange l = let
         (fstLine,rs) = span isDigit l
         (sndLine,rs3) = case rs of
+                                    -- The comma is used to separate
+                                    -- the start and end line numbers in a range,
+                                    -- but is omitted if they are the same.
+                                    -- i.e. the range is a single line.
                                     (',':rs2) -> span isDigit rs2
                                     _ -> (fstLine,rs)
         in ((read fstLine,read sndLine),rs3)
 
--- | Line number alias
+-- | Line number alias.
 type LineNo = Int
 
--- | Line Range: start, end and contents
+-- | Line Range: start, end and contents.
 data LineRange = LineRange { lrNumbers :: (LineNo, LineNo)
                            , lrContents :: [String]
                            }
-            deriving (Show,Read,Eq,Ord)
+            deriving (Show, Read, Eq, Ord)
 
--- | Diff Operation  representing changes to apply
+-- | Diff operation representing changes to apply.
 data DiffOperation a = Deletion a LineNo
             | Addition a LineNo
             | Change a a

From ecc15f18105cd624f137c5475cc380d3a02d0280 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= <xavier.gongora@tweag.io>
Date: Wed, 22 Apr 2026 15:41:23 -0600
Subject: [PATCH 2/9] Change `context` to `contextSize`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Facundo Domínguez <facundo.dominguez@tweag.io>
---
 src/Data/Algorithm/DiffContext.hs | 33 +++++++++++++++++--------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/src/Data/Algorithm/DiffContext.hs b/src/Data/Algorithm/DiffContext.hs
index f1336fc..7e849d7 100644
--- a/src/Data/Algorithm/DiffContext.hs
+++ b/src/Data/Algorithm/DiffContext.hs
@@ -87,12 +87,12 @@ unnumber (Numbered _ a) = a
 -- > -k
 getContextDiff ::
   Eq a
-  => Maybe Int -- ^ Number of context elements, 'Nothing' means returning a whole-diff 'Hunk'.
+  => Maybe Int -- ^ Context size. 'Nothing' means returning a whole-diff 'Hunk'.
   -> [a]
   -> [a]
   -> ContextDiff (Numbered a)
-getContextDiff context a b =
-  getContextDiffNumbered context (numbered a) (numbered b)
+getContextDiff contextSize a b =
+  getContextDiffNumbered contextSize (numbered a) (numbered b)
 
 -- | If for some reason you need the line numbers stripped from the
 -- result of 'getContextDiff' for backwards compatibility.
@@ -101,16 +101,19 @@ unNumberContextDiff = fmap (fmap (bimap (fmap unnumber) (fmap unnumber)))
 
 -- | Create a diff made of separate 'Hunk's by reducing the lists of common
 -- elements surrounding each sequence of differing elements to the specified
--- @context@ number. Adjancent hunks end up merged if the list of common elements
--- between them is shorter than twice the @context@.
--- If @context@ is 'Nothing', we get a single hunk with the whole diff.
+-- @contextSize@.
+--
+-- The context size determines when to merge adjacent hunks:
+-- two hunks are merged when the number of common elements between them does not
+-- exceed twice the context size. Furthermore, if @contextSize@ is 'Nothing'
+-- a single hunk with the whole diff is produced.
 getContextDiffNumbered ::
   Eq a
-  => Maybe Int -- ^ Number of context elements, 'Nothing' means returning a whole-diff 'Hunk'.
+  => Maybe Int -- ^ Context size. 'Nothing' means returning a whole-diff 'Hunk'.
   -> [Numbered a]
   -> [Numbered a]
   -> ContextDiff (Numbered a)
-getContextDiffNumbered context a0 b0 =
+getContextDiffNumbered contextSize a0 b0 =
     -- The 'Diff' list is grouped into 'Hunks' that begin and end
     -- with matching ('Both') text, having non-matching ('First' and 'Second')
     -- text in the middle. Note that a non-trivial partition can only happen after
@@ -127,27 +130,27 @@ getContextDiffNumbered context a0 b0 =
       doPrefix [Both _ _] = []
       -- Do the prefix proper.
       doPrefix (Both xs ys : more) =
-          Both (maybe xs (\n -> drop (max 0 (length xs - n)) xs) context)
-               (maybe ys (\n -> drop (max 0 (length ys - n)) ys) context) : doSuffix more
+          Both (maybe xs (\n -> drop (max 0 (length xs - n)) xs) contextSize)
+               (maybe ys (\n -> drop (max 0 (length ys - n)) ys) contextSize) : doSuffix more
       -- Prefix finished, do the diff then the following suffix.
       doPrefix (d : ds) = doSuffix (d : ds)
       -- | Handle the common text following a diff.
       doSuffix :: Hunk a -> Hunk a
       doSuffix [] = []
       -- A trailing suffix.
-      doSuffix [Both xs ys] = [Both (maybe xs (\n -> take n xs) context) (maybe ys (\n -> take n ys) context)]
-      -- Infinite context or common text too short to split.
+      doSuffix [Both xs ys] = [Both (maybe xs (\n -> take n xs) contextSize) (maybe ys (\n -> take n ys) contextSize)]
+      -- Either whole context or common text is too short to split.
       doSuffix (Both xs ys : more)
-          | maybe True (\n -> length xs <= n * 2) context =
+          | maybe True (\n -> length xs <= n * 2) contextSize =
               Both xs ys : doPrefix more
       -- If the common text long enough, split it into a suffix and prefix
       -- (resulting in some elements excluded from the diff in the middle).
       doSuffix (Both xs ys : more) =
-          Both (maybe xs (\n -> take n xs) context) (maybe ys (\n -> take n ys) context)
+          Both (maybe xs (\n -> take n xs) contextSize) (maybe ys (\n -> take n ys) contextSize)
                    -- NOTE: both 'mempty's here are unreachable in practice because:
                    -- 1. The guard above ensures that @context@ is not 'Nothing'
                    -- 2. Both lists have the same length.
-                   : doPrefix (Both (maybe mempty (\n -> drop n xs) context) (maybe mempty (\n -> drop n ys) context) : more)
+                   : doPrefix (Both (maybe mempty (\n -> drop n xs) contextSize) (maybe mempty (\n -> drop n ys) contextSize) : more)
       -- Diff elements are preserved.
       doSuffix (d : ds) = d : doSuffix ds
 

From c0b380d6e2afbbe7574ebe6315f99f71ec321afd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= <xavier.gongora@tweag.io>
Date: Wed, 22 Apr 2026 15:41:23 -0600
Subject: [PATCH 3/9] Extend `diffToLineRanges` haddock with behavior details
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Facundo Domínguez <facundo.dominguez@tweag.io>
---
 src/Data/Algorithm/DiffOutput.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Data/Algorithm/DiffOutput.hs b/src/Data/Algorithm/DiffOutput.hs
index 4634e72..4c71316 100644
--- a/src/Data/Algorithm/DiffOutput.hs
+++ b/src/Data/Algorithm/DiffOutput.hs
@@ -16,7 +16,9 @@ import Text.PrettyPrint hiding ((<>))
 import Data.Char
 import Data.List
 
--- | Converts 'Diff's to 'DiffOperation's.
+-- | Converts 'Diff's to 'DiffOperation's. 'First' and 'Second'
+-- ocurrances are converted to 'Addition' and 'Deletion', respectively, while
+-- consecutive ocurrances of them are replaced by a 'Change'.
 diffToLineRanges :: [Diff [String]] -> [DiffOperation LineRange]
 diffToLineRanges = toLineRange 1 1
    where

From 75f3924aca25df7be908a85ce02ce59bfd20d5ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= <xavier.gongora@tweag.io>
Date: Wed, 22 Apr 2026 16:05:59 -0600
Subject: [PATCH 4/9] Add `toLineRange` local function haddock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Facundo Domínguez <facundo.dominguez@tweag.io>
---
 src/Data/Algorithm/DiffOutput.hs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Data/Algorithm/DiffOutput.hs b/src/Data/Algorithm/DiffOutput.hs
index 4c71316..44ca0f1 100644
--- a/src/Data/Algorithm/DiffOutput.hs
+++ b/src/Data/Algorithm/DiffOutput.hs
@@ -22,6 +22,9 @@ import Data.List
 diffToLineRanges :: [Diff [String]] -> [DiffOperation LineRange]
 diffToLineRanges = toLineRange 1 1
    where
+          -- | In @toLineRange x y ds@, @x@ is the index of the current string in the
+          -- left input of the diff @ds@, and @y@ is the index of the corresponding
+          -- string in the right input of the diff @ds@.
           toLineRange :: Int -> Int -> [Diff [String]] -> [DiffOperation LineRange]
           toLineRange _ _ []=[]
           -- If the lines are the same, we just move forward.

From cf8e33124548046eb0f1db493000eb7facdb3a6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= <xavier.gongora@tweag.io>
Date: Wed, 22 Apr 2026 16:22:55 -0600
Subject: [PATCH 5/9] Add `DiffOperation` constructor haddocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Facundo Domínguez <facundo.dominguez@tweag.io>
---
 src/Data/Algorithm/DiffOutput.hs | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/Data/Algorithm/DiffOutput.hs b/src/Data/Algorithm/DiffOutput.hs
index 44ca0f1..7a2a122 100644
--- a/src/Data/Algorithm/DiffOutput.hs
+++ b/src/Data/Algorithm/DiffOutput.hs
@@ -176,7 +176,10 @@ data LineRange = LineRange { lrNumbers :: (LineNo, LineNo)
             deriving (Show, Read, Eq, Ord)
 
 -- | Diff operation representing changes to apply.
-data DiffOperation a = Deletion a LineNo
-            | Addition a LineNo
-            | Change a a
-            deriving (Show,Read,Eq,Ord)
+data DiffOperation a
+  = Deletion a LineNo  -- ^ Element deleted on the left input, line number
+                       -- preceding the deleted lines in the right input.
+  | Addition a LineNo  -- ^ Element added from the right input, line number
+                       -- preceding the added lines in the left input.
+  | Change a a         -- ^ Element changed from the left input to the right input.
+  deriving (Show,Read,Eq,Ord)

From aee6f8a3ec7ed7d16aedf0b99be9e0f37a83c9fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= <xavier.gongora@tweag.io>
Date: Wed, 22 Apr 2026 16:22:55 -0600
Subject: [PATCH 6/9] Add more invariants to the documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Facundo Domínguez <facundo.dominguez@tweag.io>
---
 src/Data/Algorithm/DiffOutput.hs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Data/Algorithm/DiffOutput.hs b/src/Data/Algorithm/DiffOutput.hs
index 7a2a122..10e1a07 100644
--- a/src/Data/Algorithm/DiffOutput.hs
+++ b/src/Data/Algorithm/DiffOutput.hs
@@ -166,10 +166,17 @@ parsePrettyDiffs = reverse . doParse [] . lines
                                     _ -> (fstLine,rs)
         in ((read fstLine,read sndLine),rs3)
 
--- | Line number alias.
+-- | Line number alias. Always non-negative.
 type LineNo = Int
 
 -- | Line Range: start, end and contents.
+--
+-- The following invariants hold:
+--
+-- > snd lrNumbers >= fst lrNumbers
+-- > snd lrNumbers - fst lrNumbers + 1 == length lrContents
+--
+-- which imply @lrContents@ cannot be empty.
 data LineRange = LineRange { lrNumbers :: (LineNo, LineNo)
                            , lrContents :: [String]
                            }

From dc64b8b9d6b56307c0f190590f1d520bcaa27fe5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= <xavier.gongora@tweag.io>
Date: Thu, 23 Apr 2026 09:53:33 -0600
Subject: [PATCH 7/9] Improve unreacheable code path comment

---
 src/Data/Algorithm/DiffContext.hs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Data/Algorithm/DiffContext.hs b/src/Data/Algorithm/DiffContext.hs
index 7e849d7..b768e60 100644
--- a/src/Data/Algorithm/DiffContext.hs
+++ b/src/Data/Algorithm/DiffContext.hs
@@ -146,10 +146,9 @@ getContextDiffNumbered contextSize a0 b0 =
       -- If the common text long enough, split it into a suffix and prefix
       -- (resulting in some elements excluded from the diff in the middle).
       doSuffix (Both xs ys : more) =
+          -- NOTE: the guard above ensures that the following 'maybe's
+          -- default values are unreachable and result in non-empty lists.
           Both (maybe xs (\n -> take n xs) contextSize) (maybe ys (\n -> take n ys) contextSize)
-                   -- NOTE: both 'mempty's here are unreachable in practice because:
-                   -- 1. The guard above ensures that @context@ is not 'Nothing'
-                   -- 2. Both lists have the same length.
                    : doPrefix (Both (maybe mempty (\n -> drop n xs) contextSize) (maybe mempty (\n -> drop n ys) contextSize) : more)
       -- Diff elements are preserved.
       doSuffix (d : ds) = d : doSuffix ds

From 24c5baac5511ef5828ddb1bbc97ccb09a35890fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= <xavier.gongora@tweag.io>
Date: Fri, 24 Apr 2026 13:54:38 -0600
Subject: [PATCH 8/9] Add more invariant documentation

Co-authored-by: Facundo Dominguez <facundo.dominguez@tweag.io>
---
 src/Data/Algorithm/Diff.hs        |  3 +++
 src/Data/Algorithm/DiffContext.hs | 23 ++++++++++++++++++++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/src/Data/Algorithm/Diff.hs b/src/Data/Algorithm/Diff.hs
index 42024c9..4036fc2 100644
--- a/src/Data/Algorithm/Diff.hs
+++ b/src/Data/Algorithm/Diff.hs
@@ -305,6 +305,9 @@ getDiffBy eq a b = markup a b . reverse $ ses eq a b
           markup _ _ _ = []
 
 -- | Like 'getGroupedDiff' but accepts a custom equality predicate.
+--
+-- Postcondition: the output list is guaranteed to be /chunked/. i.e. no two adjacent
+-- elements share the same constructor.
 getGroupedDiffBy :: (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff [a] [b]]
 getGroupedDiffBy eq a b = go $ getDiffBy eq a b
     where go (First x  : xs) = let (fs, rest) = goFirsts  xs in First  (x:fs)     : go rest
diff --git a/src/Data/Algorithm/DiffContext.hs b/src/Data/Algorithm/DiffContext.hs
index b768e60..29bea68 100644
--- a/src/Data/Algorithm/DiffContext.hs
+++ b/src/Data/Algorithm/DiffContext.hs
@@ -29,6 +29,10 @@ import Text.PrettyPrint (Doc, text, empty, hcat)
 type ContextDiff c = [Hunk c]
 
 -- | A 'Hunk' is a list of adjacent 'Diff's.
+--
+-- No two consecutive elements in a 'Hunk' are both applications
+-- of 'First', 'Second', or 'Both', i.e. the list does not stutter
+-- on 'Diff' constructors.
 type Hunk c = [Diff [c]]
 
 
@@ -99,9 +103,8 @@ getContextDiff contextSize a b =
 unNumberContextDiff :: ContextDiff (Numbered a) -> ContextDiff a
 unNumberContextDiff = fmap (fmap (bimap (fmap unnumber) (fmap unnumber)))
 
--- | Create a diff made of separate 'Hunk's by reducing the lists of common
--- elements surrounding each sequence of differing elements to the specified
--- @contextSize@.
+-- | Create a diff of separate 'Hunk's, each containing a sequence
+-- of differing elements surrounded by common elements for context.
 --
 -- The context size determines when to merge adjacent hunks:
 -- two hunks are merged when the number of common elements between them does not
@@ -124,9 +127,20 @@ getContextDiffNumbered contextSize a0 b0 =
       isBoth (Both _ _) = True
       isBoth _ = False
       -- | Handle the common text leading up to a diff.
+      --
+      -- The @a@ elements in @doPrefix h@ are a subset of those in @h@,
+      -- in the same order. Additionaly, 'First' and 'Second' diffs
+      -- are identical in both lists.
+      --
+      -- The difference between input and output is that some 'Both' diffs might
+      -- be split into two other 'Both' diffs. This hapṕens when their contents
+      -- are too large compared with the contex size, resulting in some @a@
+      -- elements being dropped.
       doPrefix :: Hunk a -> Hunk a
       doPrefix [] = []
       -- Trailing common elements are no prefix.
+      -- This case corresponds to when both input lists are identical, so the
+      -- resulting 'ContextDiff' is empty.
       doPrefix [Both _ _] = []
       -- Do the prefix proper.
       doPrefix (Both xs ys : more) =
@@ -135,6 +149,9 @@ getContextDiffNumbered contextSize a0 b0 =
       -- Prefix finished, do the diff then the following suffix.
       doPrefix (d : ds) = doSuffix (d : ds)
       -- | Handle the common text following a diff.
+      --
+      -- Precondition: The input does not start with a 'Both' diff. Otherwise,
+      -- it behaves like @doPrefix@.
       doSuffix :: Hunk a -> Hunk a
       doSuffix [] = []
       -- A trailing suffix.

From 8cdc2b51b411e98ccfa7ba1349f486be4a8440ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20G=C3=B3ngora?= <xavier.gongora@tweag.io>
Date: Thu, 30 Apr 2026 14:43:55 -0600
Subject: [PATCH 9/9] Further refinements

---
 src/Data/Algorithm/DiffContext.hs | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/Data/Algorithm/DiffContext.hs b/src/Data/Algorithm/DiffContext.hs
index 29bea68..0f8eb47 100644
--- a/src/Data/Algorithm/DiffContext.hs
+++ b/src/Data/Algorithm/DiffContext.hs
@@ -128,12 +128,12 @@ getContextDiffNumbered contextSize a0 b0 =
       isBoth _ = False
       -- | Handle the common text leading up to a diff.
       --
-      -- The @a@ elements in @doPrefix h@ are a subset of those in @h@,
+      -- Postcondition: The @a@ elements in @doPrefix h@ are a subset of those in @h@,
       -- in the same order. Additionaly, 'First' and 'Second' diffs
       -- are identical in both lists.
       --
       -- The difference between input and output is that some 'Both' diffs might
-      -- be split into two other 'Both' diffs. This hapṕens when their contents
+      -- be split into two other 'Both' diffs. This happens when their contents
       -- are too large compared with the contex size, resulting in some @a@
       -- elements being dropped.
       doPrefix :: Hunk a -> Hunk a
@@ -142,7 +142,7 @@ getContextDiffNumbered contextSize a0 b0 =
       -- This case corresponds to when both input lists are identical, so the
       -- resulting 'ContextDiff' is empty.
       doPrefix [Both _ _] = []
-      -- Do the prefix proper.
+      -- Do the prefix and then make the suffix.
       doPrefix (Both xs ys : more) =
           Both (maybe xs (\n -> drop (max 0 (length xs - n)) xs) contextSize)
                (maybe ys (\n -> drop (max 0 (length ys - n)) ys) contextSize) : doSuffix more
@@ -160,14 +160,17 @@ getContextDiffNumbered contextSize a0 b0 =
       doSuffix (Both xs ys : more)
           | maybe True (\n -> length xs <= n * 2) contextSize =
               Both xs ys : doPrefix more
-      -- If the common text long enough, split it into a suffix and prefix
+      -- If the common text is too short compared with the context,
+      -- we preserve it and continue. As the following element cannot be a 'Both'
+      -- as well, this effectively places the common text in the inner part of the diff.
+      -- Otherwise, we split it into a suffix and prefix
       -- (resulting in some elements excluded from the diff in the middle).
       doSuffix (Both xs ys : more) =
           -- NOTE: the guard above ensures that the following 'maybe's
           -- default values are unreachable and result in non-empty lists.
           Both (maybe xs (\n -> take n xs) contextSize) (maybe ys (\n -> take n ys) contextSize)
                    : doPrefix (Both (maybe mempty (\n -> drop n xs) contextSize) (maybe mempty (\n -> drop n ys) contextSize) : more)
-      -- Diff elements are preserved.
+      -- 'First' and 'Second' elements are no suffix, preserve them and continue looking.
       doSuffix (d : ds) = d : doSuffix ds
 
 -- | Pretty print a ContextDiff in the manner of diff -u.