From cc577831b636fcf6784eab5ede588b6cea6139d0 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Tue, 21 Apr 2026 19:28:45 -0700
Subject: [PATCH 01/31] Thompson

---
 src/Control/Lens/Grammar/BackusNaur.hs | 113 ++++++++++++++++++++++++-
 test/Properties/Kleene.hs              |  21 +++++
 2 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/src/Control/Lens/Grammar/BackusNaur.hs b/src/Control/Lens/Grammar/BackusNaur.hs
index bd47607e..5d61fa20 100644
--- a/src/Control/Lens/Grammar/BackusNaur.hs
+++ b/src/Control/Lens/Grammar/BackusNaur.hs
@@ -33,6 +33,10 @@ import Data.Bifunctor.Joker
 import Data.Coerce
 import Data.Foldable
 import Data.Function
+import qualified Data.IntMap.Strict as IntMap
+import Data.IntMap.Strict (IntMap)
+import qualified Data.IntSet as IntSet
+import Data.IntSet (IntSet)
 import Data.MemoTrie
 import qualified Data.Set as Set
 import Data.Set (Set)
@@ -171,6 +175,111 @@ rulesNamed nameX = foldl' (flip inserter) Set.empty where
   inserter (nameY,y) =
     if nameX == nameY then Set.insert y else id
 
+data ThompsonState token = ThompsonState (TokenClass token) IntSet
+
+thompsonFinalState :: Int
+thompsonFinalState = 0
+
+bypassStates :: Bool -> IntSet -> IntSet
+bypassStates True = id
+bypassStates False = const IntSet.empty
+
+containsNonTerminal :: RegEx token -> Bool
+containsNonTerminal = \
+  case
+    NonTerminal _ -> True
+    Sequence rex0 rex1 ->
+      containsNonTerminal rex0 || containsNonTerminal rex1
+    KleeneStar rex -> containsNonTerminal rex
+    KleeneOpt rex -> containsNonTerminal rex
+    KleenePlus rex -> containsNonTerminal rex
+    RegExam (Alternate rex0 rex1) ->
+      containsNonTerminal rex0 || containsNonTerminal rex1
+    _ -> False
+
+compileThompson
+  :: RegEx token
+  -> Int
+  -> IntSet
+  -> (IntSet, [(Int, ThompsonState token)], Int, Bool)
+compileThompson = go where
+  go rex nextId dests = case rex of
+    SeqEmpty -> (IntSet.empty, [], nextId, True)
+    NonTerminal _ -> (IntSet.empty, [], nextId, False)
+    Sequence rex0 rex1 ->
+      let
+        (firsts1, states1, nextId1, bypass1) = go rex1 nextId dests
+        (firsts0, states0, nextId0, bypass0) =
+          go rex0 nextId1 (firsts1 <> bypassStates bypass1 dests)
+      in
+        ( firsts0 <> bypassStates bypass0 firsts1
+        , states0 <> states1
+        , nextId0
+        , bypass0 && bypass1
+        )
+    KleeneStar rex0 ->
+      let
+        (firsts, states, nextId', _) = go rex0 nextId (firsts <> dests)
+      in
+        (firsts, states, nextId', True)
+    KleeneOpt rex0 ->
+      let
+        (firsts, states, nextId', _) = go rex0 nextId dests
+      in
+        (firsts, states, nextId', True)
+    KleenePlus rex0 ->
+      let
+        (firsts, states, nextId', bypass) = go rex0 nextId (firsts <> dests)
+      in
+        (firsts, states, nextId', bypass)
+    RegExam (OneOf chars)
+      | Set.null chars -> (IntSet.empty, [], nextId, False)
+      | otherwise ->
+          ( IntSet.singleton nextId
+          , [(nextId, ThompsonState (TokenClass (OneOf chars)) dests)]
+          , nextId + 1
+          , False
+          )
+    RegExam (NotOneOf chars catTest) ->
+      ( IntSet.singleton nextId
+      , [(nextId, ThompsonState (TokenClass (NotOneOf chars catTest)) dests)]
+      , nextId + 1
+      , False
+      )
+    RegExam (Alternate rex0 rex1) ->
+      let
+        (firsts1, states1, nextId1, bypass1) = go rex1 nextId dests
+        (firsts0, states0, nextId0, bypass0) = go rex0 nextId1 dests
+      in
+        ( firsts0 <> firsts1
+        , states0 <> states1
+        , nextId0
+        , bypass0 || bypass1
+        )
+
+compileThompsonTop
+  :: RegEx token
+  -> (IntSet, IntMap (ThompsonState token))
+compileThompsonTop rex =
+  (firsts <> bypassStates bypass finalStates, IntMap.fromList states)
+  where
+    finalStates = IntSet.singleton thompsonFinalState
+    (firsts, states, _, bypass) = compileThompson rex 1 finalStates
+
+matchThompson :: Categorized token => [token] -> RegEx token -> Bool
+matchThompson word rex = IntSet.member thompsonFinalState finalStates
+  where
+    (startStates, states) = compileThompsonTop rex
+    finalStates = foldl' step startStates word
+    step activeStates input = IntSet.foldl' advance IntSet.empty activeStates
+      where
+        advance nextStates stateId
+          | stateId == thompsonFinalState = nextStates
+          | otherwise = case IntMap.lookup stateId states of
+              Just (ThompsonState exam dests)
+                | tokenClass exam input -> nextStates <> dests
+              _ -> nextStates
+
 -- instances
 instance (Ord rule, NonTerminalSymbol rule)
   => BackusNaurForm (Bnf rule) where
@@ -215,6 +324,8 @@ instance (Categorized token, HasTrie token)
     (=~) word = δ . diffB word
 instance (Categorized token, HasTrie token)
   => Matching [token] (RegEx token) where
-    word =~ pattern = word =~ liftBnf0 pattern
+    word =~ pattern
+      | containsNonTerminal pattern = word =~ liftBnf0 pattern
+      | otherwise = matchThompson word pattern
 instance Matching s (APrism s t a b) where
   word =~ pattern = is pattern word
diff --git a/test/Properties/Kleene.hs b/test/Properties/Kleene.hs
index 7aa7ca3a..ab5f1c9d 100644
--- a/test/Properties/Kleene.hs
+++ b/test/Properties/Kleene.hs
@@ -2,6 +2,7 @@
 module Properties.Kleene (kleeneProperties) where
 
 import Control.Lens.Grammar
+import Data.Foldable (for_)
 import Test.Hspec
 import Test.Hspec.QuickCheck (prop)
 import Test.QuickCheck
@@ -95,6 +96,26 @@ kleeneProperties = do
     prop "notAsIn cat = tokenClass (notAsIn cat)" $
       \(cat :: GeneralCategory) ->
         (notAsIn cat :: RegEx Char) == tokenClass (notAsIn cat)
+    it "matching agrees with lifted Bnf on examples" $ do
+      let
+        cases =
+          [ ("", mempty :: RegEx Char)
+          , ("a", token 'a')
+          , ("b", token 'a')
+          , ("ab", token 'a' <> token 'b')
+          , ("a", token 'a' >|< token 'b')
+          , ("bbb", starK (token 'b'))
+          , ("bbb", plusK (token 'b'))
+          , ("", optK (token 'b'))
+          , ("x", oneOf "xyz")
+          , ("x", notOneOf "abc")
+          , ("A", asIn UppercaseLetter)
+          , ("a", notAsIn UppercaseLetter)
+          , ("abbb", token 'a' <> starK (token 'b'))
+          , ("cat", terminal "cat" >|< terminal "dog")
+          ]
+      for_ cases $ \(word, rex) ->
+        (word =~ rex) `shouldBe` (word =~ liftBnf0 rex)
   describe "BooleanAlgebra TokenClass" $ do
     it "trueB = anyToken" $
       (trueB :: TokenClass Char) `shouldBe` anyToken

From c928f94e976cd3f5f7063854e0b67eb1da1bd3a2 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Tue, 21 Apr 2026 20:10:20 -0700
Subject: [PATCH 02/31] mawr test

---
 src/Control/Lens/Grammar/BackusNaur.hs |   3 +-
 test/Main.hs                           | 116 +++++++++++++++++++++++++
 2 files changed, 118 insertions(+), 1 deletion(-)

diff --git a/src/Control/Lens/Grammar/BackusNaur.hs b/src/Control/Lens/Grammar/BackusNaur.hs
index 5d61fa20..1d60d7b4 100644
--- a/src/Control/Lens/Grammar/BackusNaur.hs
+++ b/src/Control/Lens/Grammar/BackusNaur.hs
@@ -205,7 +205,8 @@ compileThompson
 compileThompson = go where
   go rex nextId dests = case rex of
     SeqEmpty -> (IntSet.empty, [], nextId, True)
-    NonTerminal _ -> (IntSet.empty, [], nextId, False)
+    NonTerminal _ ->
+      error "compileThompson: NonTerminal unsupported; route via diffB"
     Sequence rex0 rex1 ->
       let
         (firsts1, states1, nextId1, bypass1) = go rex1 nextId dests
diff --git a/test/Main.hs b/test/Main.hs
index 9398364f..4d371397 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -40,6 +40,7 @@ main = do
     describe "lenvecGrammar" $ for_ lenvecExamples $ testCtxGrammar True lenvecGrammar
     describe "chainGrammar" $ for_ chainExamples $ testCtxGrammar True chainGrammar
     describe "Parsector try rollback" tryRollbackTests
+    describe "Thompson matcher" thompsonMatcherTests
     describe "Kleene" kleeneProperties
     describe "meander" meanderProperties
 
@@ -58,6 +59,121 @@ tryRollbackTests = do
     parsecStream actual `shouldBe` ""
     parsecResult actual `shouldBe` (Nothing :: Maybe String)
 
+thompsonMatcherTests :: Spec
+thompsonMatcherTests = do
+  let
+    sandwich = terminal "a" <> starK (terminal "b") <> terminal "a"
+    evenA = starK (sandwich >|< terminal "b")
+    nullableSeq = optK (terminal "a") <> terminal "b"
+    nestedStar = starK (starK (terminal "a"))
+    onePlusA = plusK (terminal "a")
+    altThenTerm = (terminal "a" >|< terminal "b") <> terminal "c"
+    notXY = notOneOf ("xy" :: String) :: RegEx Char
+    abcClass = oneOf ("abc" :: String) :: RegEx Char
+    checks :: [(String, RegEx Char, [(String, Bool)])]
+    checks =
+      [ ( "paper sandwich language"
+        , sandwich
+        , [ ("aa", True)
+          , ("aba", True)
+          , ("abbba", True)
+          , ("", False)
+          , ("a", False)
+          , ("ab", False)
+          , ("baa", False)
+          ]
+        )
+      , ( "paper even-a language"
+        , evenA
+        , [ ("", True)
+          , ("b", True)
+          , ("bb", True)
+          , ("aa", True)
+          , ("abba", True)
+          , ("baab", True)
+          , ("abaaab", True)
+          , ("a", False)
+          , ("ababa", False)
+          ]
+        )
+      , ( "nullable sequence wiring"
+        , nullableSeq
+        , [ ("b", True)
+          , ("ab", True)
+          , ("", False)
+          , ("a", False)
+          ]
+        )
+      , ( "empty and empty-class handling"
+        , mempty
+        , [ ("", True)
+          , ("a", False)
+          ]
+        )
+      , ( "failing token class"
+        , zeroK
+        , [ ("", False)
+          , ("a", False)
+          ]
+        )
+      , ( "nested star (a*)*"
+        , nestedStar
+        , [ ("", True)
+          , ("a", True)
+          , ("aaaa", True)
+          , ("b", False)
+          , ("aab", False)
+          ]
+        )
+      , ( "kleene plus a+"
+        , onePlusA
+        , [ ("a", True)
+          , ("aaa", True)
+          , ("", False)
+          , ("ab", False)
+          , ("b", False)
+          ]
+        )
+      , ( "alternate then terminal (a|b)c"
+        , altThenTerm
+        , [ ("ac", True)
+          , ("bc", True)
+          , ("", False)
+          , ("c", False)
+          , ("abc", False)
+          , ("a", False)
+          ]
+        )
+      , ( "notOneOf \"xy\""
+        , notXY
+        , [ ("a", True)
+          , ("z", True)
+          , ("x", False)
+          , ("y", False)
+          , ("", False)
+          , ("ab", False)
+          ]
+        )
+      , ( "oneOf \"abc\""
+        , abcClass
+        , [ ("a", True)
+          , ("b", True)
+          , ("c", True)
+          , ("d", False)
+          , ("", False)
+          , ("ab", False)
+          ]
+        )
+      ]
+
+  for_ checks $ \(label, rex, cases) ->
+    for_ cases $ \(word, expected) ->
+      it (label <> " matches " <> show word) $ do
+        let thompsonMatch = word =~ rex
+        let derivativeMatch = word =~ liftBnf0 rex
+        thompsonMatch `shouldBe` expected
+        thompsonMatch `shouldBe` derivativeMatch
+
 doctests :: IO ()
 doctests = do
   let

From fb01a85fcb8a1ece04d51441fd299646ced8efa8 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Wed, 22 Apr 2026 06:53:48 -0700
Subject: [PATCH 03/31] earley

---
 src/Control/Lens/Grammar/BackusNaur.hs | 234 ++++++++++++++++++++++++-
 test/Examples/Chain.hs                 |   2 +-
 test/Examples/Json.hs                  |  44 ++---
 test/Main.hs                           | 104 ++++++++++-
 4 files changed, 354 insertions(+), 30 deletions(-)

diff --git a/src/Control/Lens/Grammar/BackusNaur.hs b/src/Control/Lens/Grammar/BackusNaur.hs
index 1d60d7b4..bc465d7a 100644
--- a/src/Control/Lens/Grammar/BackusNaur.hs
+++ b/src/Control/Lens/Grammar/BackusNaur.hs
@@ -37,6 +37,8 @@ import qualified Data.IntMap.Strict as IntMap
 import Data.IntMap.Strict (IntMap)
 import qualified Data.IntSet as IntSet
 import Data.IntSet (IntSet)
+import qualified Data.Map.Strict as Map
+import Data.Map.Strict (Map)
 import Data.MemoTrie
 import qualified Data.Set as Set
 import Data.Set (Set)
@@ -281,6 +283,232 @@ matchThompson word rex = IntSet.member thompsonFinalState finalStates
                 | tokenClass exam input -> nextStates <> dests
               _ -> nextStates
 
+-- | A state in the Earley-extended Thompson transducer for a `Bnf`.
+-- @ESTerminal cls ds@ matches on a token class and transitions to @ds@.
+-- @ESNonterm name ds@ is a call point for rule @name@; after @name@
+-- completes, control flows to @ds@. @ESEmit name@ is the final state
+-- for rule @name@ and triggers completion during Earley closure.
+data EarleyState token
+  = ESTerminal (TokenClass token) IntSet
+  | ESNonterm String IntSet
+  | ESEmit String
+
+data EarleyTransducer token = EarleyTransducer
+  { etStates      :: IntMap (EarleyState token)
+  , etRuleEntries :: Map String (IntSet, Bool)
+  , etAcceptId    :: Int
+  , etStartStates :: IntSet
+  }
+
+earleyAcceptId :: Int
+earleyAcceptId = 0
+
+nullableRules :: Map String [RegEx token] -> Map String Bool
+nullableRules ruleMap = iter (Map.map (const False) ruleMap)
+  where
+  iter nm =
+    let nm' = Map.mapWithKey
+          (\n _ -> any (rexNullable nm) (Map.findWithDefault [] n ruleMap)) nm
+    in if nm == nm' then nm else iter nm'
+  rexNullable nm = \case
+    SeqEmpty -> True
+    NonTerminal n -> Map.findWithDefault False n nm
+    Sequence x y -> rexNullable nm x && rexNullable nm y
+    KleeneStar _ -> True
+    KleeneOpt _ -> True
+    KleenePlus x -> rexNullable nm x
+    RegExam (Alternate x y) -> rexNullable nm x || rexNullable nm y
+    RegExam (OneOf _) -> False
+    RegExam (NotOneOf _ _) -> False
+
+compileEarley
+  :: forall token. Bnf (RegEx token) -> EarleyTransducer token
+compileEarley (Bnf start rules) = EarleyTransducer
+  { etStates = IntMap.fromList allStates
+  , etRuleEntries = Map.fromList
+      [ ( n
+        , ( Map.findWithDefault IntSet.empty n firstsMap
+          , Map.findWithDefault False n nullMap
+          )
+        )
+      | n <- Map.keys ruleMap
+      ]
+  , etAcceptId = earleyAcceptId
+  , etStartStates = startStates
+  }
+  where
+    ruleMap :: Map String [RegEx token]
+    ruleMap = foldr
+      (\(n, r) -> Map.insertWith (++) n [r]) Map.empty (toList rules)
+    nullMap = nullableRules ruleMap
+    ruleNames = Map.keys ruleMap
+    (finalMap, nextIdAfterFinals) =
+      foldl' alloc (Map.empty, earleyAcceptId + 1) ruleNames
+      where alloc (m, i) n = (Map.insert n i m, i + 1)
+    finalStatesList = [(finalMap Map.! n, ESEmit n) | n <- ruleNames]
+    (rulesStatesList, firstsMap, nextIdAfterRules) =
+      foldl' compileRule ([], Map.empty, nextIdAfterFinals) (Map.toList ruleMap)
+      where
+        compileRule (sts, fm, nid) (name, prods) =
+          let finalId = finalMap Map.! name
+              (newSts, newFirsts, nid') =
+                foldl' compileProd ([], IntSet.empty, nid) prods
+              compileProd (s, fs, i) prod =
+                let (f, st, i', _) =
+                      goEarley nullMap prod i (IntSet.singleton finalId)
+                in (s <> st, fs <> f, i')
+          in (sts <> newSts, Map.insert name newFirsts fm, nid')
+    (startFirsts, startStatesRaw, _, startBypass) =
+      goEarley nullMap start nextIdAfterRules (IntSet.singleton earleyAcceptId)
+    startStates =
+      startFirsts <> bypassStates startBypass (IntSet.singleton earleyAcceptId)
+    allStates = finalStatesList <> rulesStatesList <> startStatesRaw
+
+goEarley
+  :: Map String Bool
+  -> RegEx token
+  -> Int
+  -> IntSet
+  -> (IntSet, [(Int, EarleyState token)], Int, Bool)
+goEarley nullMap = go
+  where
+  go rex nextId dests = case rex of
+    SeqEmpty -> (IntSet.empty, [], nextId, True)
+    NonTerminal name ->
+      ( IntSet.singleton nextId
+      , [(nextId, ESNonterm name dests)]
+      , nextId + 1
+      , Map.findWithDefault False name nullMap
+      )
+    Sequence rex0 rex1 ->
+      let
+        (firsts1, states1, nextId1, bypass1) = go rex1 nextId dests
+        (firsts0, states0, nextId0, bypass0) =
+          go rex0 nextId1 (firsts1 <> bypassStates bypass1 dests)
+      in
+        ( firsts0 <> bypassStates bypass0 firsts1
+        , states0 <> states1
+        , nextId0
+        , bypass0 && bypass1
+        )
+    KleeneStar rex0 ->
+      let
+        (firsts, states, nextId', _) = go rex0 nextId (firsts <> dests)
+      in
+        (firsts, states, nextId', True)
+    KleeneOpt rex0 ->
+      let
+        (firsts, states, nextId', _) = go rex0 nextId dests
+      in
+        (firsts, states, nextId', True)
+    KleenePlus rex0 ->
+      let
+        (firsts, states, nextId', bypass) = go rex0 nextId (firsts <> dests)
+      in
+        (firsts, states, nextId', bypass)
+    RegExam (OneOf chars)
+      | Set.null chars -> (IntSet.empty, [], nextId, False)
+      | otherwise ->
+          ( IntSet.singleton nextId
+          , [(nextId, ESTerminal (TokenClass (OneOf chars)) dests)]
+          , nextId + 1
+          , False
+          )
+    RegExam (NotOneOf chars catTest) ->
+      ( IntSet.singleton nextId
+      , [(nextId, ESTerminal (TokenClass (NotOneOf chars catTest)) dests)]
+      , nextId + 1
+      , False
+      )
+    RegExam (Alternate rex0 rex1) ->
+      let
+        (firsts1, states1, nextId1, bypass1) = go rex1 nextId dests
+        (firsts0, states0, nextId0, bypass0) = go rex0 nextId1 dests
+      in
+        ( firsts0 <> firsts1
+        , states0 <> states1
+        , nextId0
+        , bypass0 || bypass1
+        )
+
+matchEarley
+  :: Categorized token => [token] -> EarleyTransducer token -> Bool
+matchEarley word et = IntSet.member 0 acceptOrigins
+  where
+    initialE0 = IntMap.fromList
+      [ (s, IntSet.singleton 0) | s <- IntSet.toList (etStartStates et) ]
+    sets0 = IntMap.singleton 0 initialE0
+    sets0closed = closureAt et 0 sets0
+    (finalSets, n) = runInput 0 sets0closed word
+    runInput j ss [] = (ss, j)
+    runInput j ss (x : xs) =
+      let scanned = scanFrom et j x ss
+          ss' = IntMap.insert (j + 1) scanned ss
+          closed = closureAt et (j + 1) ss'
+      in runInput (j + 1) closed xs
+    en = IntMap.findWithDefault IntMap.empty n finalSets
+    acceptOrigins = IntMap.findWithDefault IntSet.empty (etAcceptId et) en
+
+scanFrom
+  :: Categorized token
+  => EarleyTransducer token -> Int -> token
+  -> IntMap (IntMap IntSet) -> IntMap IntSet
+scanFrom et j input ss = IntMap.foldrWithKey advance IntMap.empty e_j
+  where
+    e_j = IntMap.findWithDefault IntMap.empty j ss
+    advance s origs acc = case IntMap.lookup s (etStates et) of
+      Just (ESTerminal cls ds) | tokenClass cls input ->
+        IntSet.foldr
+          (\d -> IntMap.insertWith IntSet.union d origs) acc ds
+      _ -> acc
+
+closureAt
+  :: EarleyTransducer token -> Int
+  -> IntMap (IntMap IntSet) -> IntMap (IntMap IntSet)
+closureAt et j initialSets = loop initialWork initialSets
+  where
+    initialE = IntMap.findWithDefault IntMap.empty j initialSets
+    initialWork =
+      [ (s, i) | (s, os) <- IntMap.toList initialE, i <- IntSet.toList os ]
+    loop [] ss = ss
+    loop ((s, i) : rest) ss = case IntMap.lookup s (etStates et) of
+      Just (ESNonterm name ds) ->
+        let (firsts, isNull) = Map.findWithDefault
+              (IntSet.empty, False) name (etRuleEntries et)
+            predItems = [(f, j) | f <- IntSet.toList firsts]
+            nullItems =
+              if isNull then [(d, i) | d <- IntSet.toList ds] else []
+            (ss', new) = addEarleyItems j (predItems <> nullItems) ss
+        in loop (new <> rest) ss'
+      Just (ESEmit name) ->
+        let e_i = IntMap.findWithDefault IntMap.empty i ss
+            completions =
+              [ (d, i')
+              | (t, os) <- IntMap.toList e_i
+              , Just (ESNonterm n ds) <- [IntMap.lookup t (etStates et)]
+              , n == name
+              , i' <- IntSet.toList os
+              , d <- IntSet.toList ds
+              ]
+            (ss', new) = addEarleyItems j completions ss
+        in loop (new <> rest) ss'
+      _ -> loop rest ss
+
+addEarleyItems
+  :: Int -> [(Int, Int)] -> IntMap (IntMap IntSet)
+  -> (IntMap (IntMap IntSet), [(Int, Int)])
+addEarleyItems j items ss = foldl' ins (ss, []) items
+  where
+    ins (acc, new) (state, origin) =
+      let e_j = IntMap.findWithDefault IntMap.empty j acc
+          os = IntMap.findWithDefault IntSet.empty state e_j
+      in if IntSet.member origin os
+         then (acc, new)
+         else
+           let e_j' = IntMap.insert state (IntSet.insert origin os) e_j
+               acc' = IntMap.insert j e_j' acc
+           in (acc', (state, origin) : new)
+
 -- instances
 instance (Ord rule, NonTerminalSymbol rule)
   => BackusNaurForm (Bnf rule) where
@@ -320,10 +548,10 @@ instance (Ord rule, Monoid rule) => Monoid (Bnf rule) where
   mempty = liftBnf0 mempty
 instance (Ord rule, Semigroup rule) => Semigroup (Bnf rule) where
   (<>) = liftBnf2 (<>)
-instance (Categorized token, HasTrie token)
+instance Categorized token
   => Matching [token] (Bnf (RegEx token)) where
-    (=~) word = δ . diffB word
-instance (Categorized token, HasTrie token)
+    word =~ bnf = matchEarley word (compileEarley bnf)
+instance Categorized token
   => Matching [token] (RegEx token) where
     word =~ pattern
       | containsNonTerminal pattern = word =~ liftBnf0 pattern
diff --git a/test/Examples/Chain.hs b/test/Examples/Chain.hs
index 5d0f964e..1b54285a 100644
--- a/test/Examples/Chain.hs
+++ b/test/Examples/Chain.hs
@@ -16,7 +16,7 @@ data Chain
 
 makePrisms ''Chain
 
-chainGrammar :: CtxGrammar Char Chain
+chainGrammar :: Grammar Char Chain
 chainGrammar = ruleRec "chain" seqG
   where
     seqG chn = rule "seq" $
diff --git a/test/Examples/Json.hs b/test/Examples/Json.hs
index e3aec526..71305d6c 100644
--- a/test/Examples/Json.hs
+++ b/test/Examples/Json.hs
@@ -5,7 +5,7 @@ module Examples.Json
   ) where
 
 import Control.Applicative
-import Control.Lens
+import Control.Lens hiding (element)
 import Control.Lens.Grammar
 import qualified Data.Map.Strict as Map
 import Data.Map.Strict (Map)
@@ -24,50 +24,54 @@ data Json
 -- Generate prisms
 makePrisms ''Json
 
--- | JSON grammar following the McKeeman Form specification from json.org
+-- | JSON grammar following the McKeeman Form specification from json.org.
+-- The inner rules are mutually recursive: element ↔ value ↔ array ↔
+-- elements ↔ element, and element ↔ value ↔ object ↔ members ↔ member
+-- ↔ element. Only a rule bound via `ruleRec` produces a stub that
+-- breaks the cycle; a plain `rule` invocation forces its body, so
+-- every cyclic back-edge must instead use the `element` stub produced
+-- by the inner `ruleRec "element"` below.
 jsonGrammar :: Grammar Char Json
-jsonGrammar = ruleRec "json" elementG
+jsonGrammar = ruleRec "json" $ \_json ->
+  ruleRec "element" $ \element ->
+    ws >* valueG element *< ws
   where
-    -- element = ws value ws
-    elementG json = rule "element" $
-      ws >* valueG json *< ws
-
     -- value = object | array | string | number | "true" | "false" | "null"
-    valueG json = rule "value" $ choice
+    valueG element = rule "value" $ choice
       [ _JNull >? terminal "null"
       , _JBool . only True >? terminal "true"
       , _JBool . only False >? terminal "false"
       , _JNumber >? numberG
       , _JString >? stringG
-      , _JArray >? arrayG json
-      , _JObject >? objectG json
+      , _JArray >? arrayG element
+      , _JObject >? objectG element
       ]
 
     -- object = '{' ws '}' | '{' members '}'
-    objectG json = rule "object" $ choice
+    objectG element = rule "object" $ choice
       [ only Map.empty >?
           terminal "{" >* ws >* terminal "}"
       , iso Map.toList Map.fromList >~
-          terminal "{" >* membersG json *< terminal "}"
+          terminal "{" >* membersG element *< terminal "}"
       ]
 
     -- members = member | member ',' members
-    membersG json = rule "members" $
-      several1 (sepWith ",") (memberG json)
+    membersG element = rule "members" $
+      several1 (sepWith ",") (memberG element)
 
     -- member = ws string ws ':' element
-    memberG json = rule "member" $
-      ws >* stringG *< ws *< terminal ":" >*< elementG json
+    memberG element = rule "member" $
+      ws >* stringG *< ws *< terminal ":" >*< element
 
     -- array = '[' ws ']' | '[' elements ']'
-    arrayG json = rule "array" $ choice
+    arrayG element = rule "array" $ choice
       [ only [] >? terminal "[" >* ws >* terminal "]"
-      , terminal "[" >* elementsG json *< terminal "]"
+      , terminal "[" >* elementsG element *< terminal "]"
       ]
 
     -- elements = element | element ',' elements
-    elementsG json = rule "elements" $
-      several1 (sepWith ",") (elementG json)
+    elementsG element = rule "elements" $
+      several1 (sepWith ",") element
 
     -- string = '"' characters '"'
     stringG = rule "string" $
diff --git a/test/Main.hs b/test/Main.hs
index 4d371397..39c9f1cf 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -31,16 +31,17 @@ main = do
       describe "doctest" $
         it "should run haddock examples" doctests
     describe "regexGrammar" $ for_ regexExamples $ testGrammar False regexGrammar
-    describe "semverGrammar" $ for_ semverExamples $ testCtxGrammar True semverGrammar
+    describe "semverGrammar" $ for_ semverExamples $ testGrammar True semverGrammar
     describe "semverCtxGrammar" $ for_ semverExamples $ testCtxGrammar True semverCtxGrammar
     describe "arithGrammar" $ for_ arithExamples $ testGrammar True arithGrammar
     describe "jsonGrammar" $ for_ jsonExamples $ testCtxGrammar False jsonGrammar
-    describe "sexprGrammar" $ for_ sexprExamples $ testCtxGrammar True sexprGrammar
-    describe "lambdaGrammar" $ for_ lambdaExamples $ testCtxGrammar True lambdaGrammar
+    describe "sexprGrammar" $ for_ sexprExamples $ testGrammar True sexprGrammar
+    describe "lambdaGrammar" $ for_ lambdaExamples $ testGrammar True lambdaGrammar
     describe "lenvecGrammar" $ for_ lenvecExamples $ testCtxGrammar True lenvecGrammar
-    describe "chainGrammar" $ for_ chainExamples $ testCtxGrammar True chainGrammar
+    describe "chainGrammar" $ for_ chainExamples $ testGrammar True chainGrammar
     describe "Parsector try rollback" tryRollbackTests
     describe "Thompson matcher" thompsonMatcherTests
+    describe "Earley matcher" earleyMatcherTests
     describe "Kleene" kleeneProperties
     describe "meander" meanderProperties
 
@@ -170,9 +171,100 @@ thompsonMatcherTests = do
     for_ cases $ \(word, expected) ->
       it (label <> " matches " <> show word) $ do
         let thompsonMatch = word =~ rex
-        let derivativeMatch = word =~ liftBnf0 rex
+        let earleyMatch = word =~ liftBnf0 rex
         thompsonMatch `shouldBe` expected
-        thompsonMatch `shouldBe` derivativeMatch
+        thompsonMatch `shouldBe` earleyMatch
+
+earleyMatcherTests :: Spec
+earleyMatcherTests = do
+  let
+    parens :: Bnf (RegEx Char)
+    parens = ruleRec "S" $ \s ->
+      mempty >|< terminal "(" <> s <> terminal ")"
+    -- S = A B; A = "a"; B = "b"
+    ab :: Bnf (RegEx Char)
+    ab = rule "A" (terminal "a")
+      <> rule "B" (terminal "b")
+    -- Left-recursive: A = A 'x' | 'x'
+    leftRec :: Bnf (RegEx Char)
+    leftRec = ruleRec "A" $ \a ->
+      a <> terminal "x" >|< terminal "x"
+    -- Mutual recursion with nullable bridges.
+    -- S = A B; A = 'a'?; B = 'b'
+    mutualNull :: Bnf (RegEx Char)
+    mutualNull = rule "A" (optK (terminal "a"))
+      <> rule "B" (terminal "b")
+    -- Ambiguous palindromes over 'a','b':
+    -- P = 'a' P 'a' | 'b' P 'b' | 'a' | 'b' | ε
+    palindrome :: Bnf (RegEx Char)
+    palindrome = ruleRec "P" $ \p ->
+      terminal "a" <> p <> terminal "a"
+      >|< terminal "b" <> p <> terminal "b"
+      >|< terminal "a"
+      >|< terminal "b"
+      >|< mempty
+    checks :: [(String, Bnf (RegEx Char), [(String, Bool)])]
+    checks =
+      [ ( "balanced parens S = (S) | ε"
+        , parens
+        , [ ("", True)
+          , ("()", True)
+          , ("(())", True)
+          , ("((()))", True)
+          , ("(", False)
+          , (")", False)
+          , ("(()", False)
+          , ("())", False)
+          ]
+        )
+      , ( "two-rule concat A B"
+        , ab
+        , [ ("ab", True)
+          , ("", False)
+          , ("a", False)
+          , ("b", False)
+          , ("ba", False)
+          , ("abc", False)
+          ]
+        )
+      , ( "left recursion A = A x | x"
+        , leftRec
+        , [ ("x", True)
+          , ("xx", True)
+          , ("xxxx", True)
+          , ("", False)
+          , ("y", False)
+          , ("xy", False)
+          ]
+        )
+      , ( "nullable nonterminal A?B"
+        , mutualNull
+        , [ ("b", True)
+          , ("ab", True)
+          , ("", False)
+          , ("a", False)
+          , ("bb", False)
+          ]
+        )
+      , ( "ambiguous palindromes"
+        , palindrome
+        , [ ("", True)
+          , ("a", True)
+          , ("b", True)
+          , ("aa", True)
+          , ("aba", True)
+          , ("abba", True)
+          , ("abaaba", True)
+          , ("ab", False)
+          , ("abab", False)
+          , ("abca", False)
+          ]
+        )
+      ]
+  for_ checks $ \(label, bnf, cases) ->
+    for_ cases $ \(word, expected) ->
+      it (label <> " matches " <> show word) $
+        (word =~ bnf) `shouldBe` expected
 
 doctests :: IO ()
 doctests = do

From c071f0f38fcc27c898c9cf8a7e4f9b822d12a754 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Wed, 22 Apr 2026 06:59:01 -0700
Subject: [PATCH 04/31] Update Json.hs

---
 test/Examples/Json.hs | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/test/Examples/Json.hs b/test/Examples/Json.hs
index 71305d6c..3cb39591 100644
--- a/test/Examples/Json.hs
+++ b/test/Examples/Json.hs
@@ -32,46 +32,47 @@ makePrisms ''Json
 -- every cyclic back-edge must instead use the `element` stub produced
 -- by the inner `ruleRec "element"` below.
 jsonGrammar :: Grammar Char Json
-jsonGrammar = ruleRec "json" $ \_json ->
-  ruleRec "element" $ \element ->
-    ws >* valueG element *< ws
+jsonGrammar = rule "json" elementG
   where
+    elementG = ruleRec "element" $ \json ->
+      ws >* valueG json *< ws
+
     -- value = object | array | string | number | "true" | "false" | "null"
-    valueG element = rule "value" $ choice
+    valueG json = rule "value" $ choice
       [ _JNull >? terminal "null"
       , _JBool . only True >? terminal "true"
       , _JBool . only False >? terminal "false"
       , _JNumber >? numberG
       , _JString >? stringG
-      , _JArray >? arrayG element
-      , _JObject >? objectG element
+      , _JArray >? arrayG json
+      , _JObject >? objectG json
       ]
 
     -- object = '{' ws '}' | '{' members '}'
-    objectG element = rule "object" $ choice
+    objectG json = rule "object" $ choice
       [ only Map.empty >?
           terminal "{" >* ws >* terminal "}"
       , iso Map.toList Map.fromList >~
-          terminal "{" >* membersG element *< terminal "}"
+          terminal "{" >* membersG json *< terminal "}"
       ]
 
     -- members = member | member ',' members
-    membersG element = rule "members" $
-      several1 (sepWith ",") (memberG element)
+    membersG json = rule "members" $
+      several1 (sepWith ",") (memberG json)
 
     -- member = ws string ws ':' element
-    memberG element = rule "member" $
-      ws >* stringG *< ws *< terminal ":" >*< element
+    memberG json = rule "member" $
+      ws >* stringG *< ws *< terminal ":" >*< json
 
     -- array = '[' ws ']' | '[' elements ']'
-    arrayG element = rule "array" $ choice
+    arrayG json = rule "array" $ choice
       [ only [] >? terminal "[" >* ws >* terminal "]"
-      , terminal "[" >* elementsG element *< terminal "]"
+      , terminal "[" >* elementsG json *< terminal "]"
       ]
 
     -- elements = element | element ',' elements
-    elementsG element = rule "elements" $
-      several1 (sepWith ",") element
+    elementsG json = rule "elements" $
+      several1 (sepWith ",") json
 
     -- string = '"' characters '"'
     stringG = rule "string" $

From a50c662e7faf7e71e891a79f3db3c3d467b452e1 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Wed, 22 Apr 2026 06:59:24 -0700
Subject: [PATCH 05/31] Update Json.hs

---
 test/Examples/Json.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/Examples/Json.hs b/test/Examples/Json.hs
index 3cb39591..eb1b24d6 100644
--- a/test/Examples/Json.hs
+++ b/test/Examples/Json.hs
@@ -5,7 +5,7 @@ module Examples.Json
   ) where
 
 import Control.Applicative
-import Control.Lens hiding (element)
+import Control.Lens
 import Control.Lens.Grammar
 import qualified Data.Map.Strict as Map
 import Data.Map.Strict (Map)

From ed89f88a6ed87593218153e2f8721659182392f0 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Wed, 22 Apr 2026 07:44:24 -0700
Subject: [PATCH 06/31] cleanup

---
 src/Control/Lens/Grammar/BackusNaur.hs | 452 ++++++++++---------------
 test/Main.hs                           | 208 ------------
 2 files changed, 170 insertions(+), 490 deletions(-)

diff --git a/src/Control/Lens/Grammar/BackusNaur.hs b/src/Control/Lens/Grammar/BackusNaur.hs
index bc465d7a..c5868f31 100644
--- a/src/Control/Lens/Grammar/BackusNaur.hs
+++ b/src/Control/Lens/Grammar/BackusNaur.hs
@@ -177,155 +177,27 @@ rulesNamed nameX = foldl' (flip inserter) Set.empty where
   inserter (nameY,y) =
     if nameX == nameY then Set.insert y else id
 
-data ThompsonState token = ThompsonState (TokenClass token) IntSet
-
-thompsonFinalState :: Int
-thompsonFinalState = 0
-
-bypassStates :: Bool -> IntSet -> IntSet
-bypassStates True = id
-bypassStates False = const IntSet.empty
-
-containsNonTerminal :: RegEx token -> Bool
-containsNonTerminal = \
-  case
-    NonTerminal _ -> True
-    Sequence rex0 rex1 ->
-      containsNonTerminal rex0 || containsNonTerminal rex1
-    KleeneStar rex -> containsNonTerminal rex
-    KleeneOpt rex -> containsNonTerminal rex
-    KleenePlus rex -> containsNonTerminal rex
-    RegExam (Alternate rex0 rex1) ->
-      containsNonTerminal rex0 || containsNonTerminal rex1
-    _ -> False
-
-compileThompson
-  :: RegEx token
-  -> Int
-  -> IntSet
-  -> (IntSet, [(Int, ThompsonState token)], Int, Bool)
-compileThompson = go where
-  go rex nextId dests = case rex of
-    SeqEmpty -> (IntSet.empty, [], nextId, True)
-    NonTerminal _ ->
-      error "compileThompson: NonTerminal unsupported; route via diffB"
-    Sequence rex0 rex1 ->
-      let
-        (firsts1, states1, nextId1, bypass1) = go rex1 nextId dests
-        (firsts0, states0, nextId0, bypass0) =
-          go rex0 nextId1 (firsts1 <> bypassStates bypass1 dests)
-      in
-        ( firsts0 <> bypassStates bypass0 firsts1
-        , states0 <> states1
-        , nextId0
-        , bypass0 && bypass1
-        )
-    KleeneStar rex0 ->
-      let
-        (firsts, states, nextId', _) = go rex0 nextId (firsts <> dests)
-      in
-        (firsts, states, nextId', True)
-    KleeneOpt rex0 ->
-      let
-        (firsts, states, nextId', _) = go rex0 nextId dests
-      in
-        (firsts, states, nextId', True)
-    KleenePlus rex0 ->
-      let
-        (firsts, states, nextId', bypass) = go rex0 nextId (firsts <> dests)
-      in
-        (firsts, states, nextId', bypass)
-    RegExam (OneOf chars)
-      | Set.null chars -> (IntSet.empty, [], nextId, False)
-      | otherwise ->
-          ( IntSet.singleton nextId
-          , [(nextId, ThompsonState (TokenClass (OneOf chars)) dests)]
-          , nextId + 1
-          , False
-          )
-    RegExam (NotOneOf chars catTest) ->
-      ( IntSet.singleton nextId
-      , [(nextId, ThompsonState (TokenClass (NotOneOf chars catTest)) dests)]
-      , nextId + 1
-      , False
-      )
-    RegExam (Alternate rex0 rex1) ->
-      let
-        (firsts1, states1, nextId1, bypass1) = go rex1 nextId dests
-        (firsts0, states0, nextId0, bypass0) = go rex0 nextId1 dests
-      in
-        ( firsts0 <> firsts1
-        , states0 <> states1
-        , nextId0
-        , bypass0 || bypass1
-        )
-
-compileThompsonTop
-  :: RegEx token
-  -> (IntSet, IntMap (ThompsonState token))
-compileThompsonTop rex =
-  (firsts <> bypassStates bypass finalStates, IntMap.fromList states)
-  where
-    finalStates = IntSet.singleton thompsonFinalState
-    (firsts, states, _, bypass) = compileThompson rex 1 finalStates
-
-matchThompson :: Categorized token => [token] -> RegEx token -> Bool
-matchThompson word rex = IntSet.member thompsonFinalState finalStates
-  where
-    (startStates, states) = compileThompsonTop rex
-    finalStates = foldl' step startStates word
-    step activeStates input = IntSet.foldl' advance IntSet.empty activeStates
-      where
-        advance nextStates stateId
-          | stateId == thompsonFinalState = nextStates
-          | otherwise = case IntMap.lookup stateId states of
-              Just (ThompsonState exam dests)
-                | tokenClass exam input -> nextStates <> dests
-              _ -> nextStates
-
 -- | A state in the Earley-extended Thompson transducer for a `Bnf`.
--- @ESTerminal cls ds@ matches on a token class and transitions to @ds@.
--- @ESNonterm name ds@ is a call point for rule @name@; after @name@
--- completes, control flows to @ds@. @ESEmit name@ is the final state
+-- @EarleyTerminal cls ds@ matches on a token class and transitions to @ds@.
+-- @EarleyNonTerminal name ds@ is a call point for rule @name@; after @name@
+-- completes, control flows to @ds@. @EarleyEmit name@ is the final state
 -- for rule @name@ and triggers completion during Earley closure.
 data EarleyState token
-  = ESTerminal (TokenClass token) IntSet
-  | ESNonterm String IntSet
-  | ESEmit String
+  = EarleyTerminal (TokenClass token) IntSet
+  | EarleyNonTerminal String IntSet
+  | EarleyEmit String
 
 data EarleyTransducer token = EarleyTransducer
-  { etStates      :: IntMap (EarleyState token)
-  , etRuleEntries :: Map String (IntSet, Bool)
-  , etAcceptId    :: Int
-  , etStartStates :: IntSet
+  { earleyStates :: IntMap (EarleyState token)
+  , earleyRules :: Map String (IntSet, Bool)
+  , earleyAcceptId :: Int
+  , earleyStartStates :: IntSet
   }
 
-earleyAcceptId :: Int
-earleyAcceptId = 0
-
-nullableRules :: Map String [RegEx token] -> Map String Bool
-nullableRules ruleMap = iter (Map.map (const False) ruleMap)
-  where
-  iter nm =
-    let nm' = Map.mapWithKey
-          (\n _ -> any (rexNullable nm) (Map.findWithDefault [] n ruleMap)) nm
-    in if nm == nm' then nm else iter nm'
-  rexNullable nm = \case
-    SeqEmpty -> True
-    NonTerminal n -> Map.findWithDefault False n nm
-    Sequence x y -> rexNullable nm x && rexNullable nm y
-    KleeneStar _ -> True
-    KleeneOpt _ -> True
-    KleenePlus x -> rexNullable nm x
-    RegExam (Alternate x y) -> rexNullable nm x || rexNullable nm y
-    RegExam (OneOf _) -> False
-    RegExam (NotOneOf _ _) -> False
-
-compileEarley
-  :: forall token. Bnf (RegEx token) -> EarleyTransducer token
+compileEarley :: Bnf (RegEx token) -> EarleyTransducer token
 compileEarley (Bnf start rules) = EarleyTransducer
-  { etStates = IntMap.fromList allStates
-  , etRuleEntries = Map.fromList
+  { earleyStates = IntMap.fromList allStates
+  , earleyRules = Map.fromList
       [ ( n
         , ( Map.findWithDefault IntSet.empty n firstsMap
           , Map.findWithDefault False n nullMap
@@ -333,19 +205,43 @@ compileEarley (Bnf start rules) = EarleyTransducer
         )
       | n <- Map.keys ruleMap
       ]
-  , etAcceptId = earleyAcceptId
-  , etStartStates = startStates
+  , earleyAcceptId = earleyAcceptId0
+  , earleyStartStates = startStates
   }
+
   where
-    ruleMap :: Map String [RegEx token]
+
     ruleMap = foldr
       (\(n, r) -> Map.insertWith (++) n [r]) Map.empty (toList rules)
-    nullMap = nullableRules ruleMap
+
+    rexNullable nm = \case
+      SeqEmpty -> True
+      NonTerminal n -> Map.findWithDefault False n nm
+      Sequence x y -> rexNullable nm x && rexNullable nm y
+      KleeneStar _ -> True
+      KleeneOpt _ -> True
+      KleenePlus x -> rexNullable nm x
+      RegExam (Alternate x y) -> rexNullable nm x || rexNullable nm y
+      RegExam (OneOf _) -> False
+      RegExam (NotOneOf _ _) -> False
+
+    iterNull nm =
+      let nm' = Map.mapWithKey
+            (\n _ -> any (rexNullable nm) (Map.findWithDefault [] n ruleMap)) nm
+      in if nm == nm' then nm else iterNull nm'
+
+    nullMap = iterNull (Map.map (const False) ruleMap)
+
     ruleNames = Map.keys ruleMap
+
+    earleyAcceptId0 = 0
+
     (finalMap, nextIdAfterFinals) =
-      foldl' alloc (Map.empty, earleyAcceptId + 1) ruleNames
+      foldl' alloc (Map.empty, earleyAcceptId0 + 1) ruleNames
       where alloc (m, i) n = (Map.insert n i m, i + 1)
-    finalStatesList = [(finalMap Map.! n, ESEmit n) | n <- ruleNames]
+
+    finalStatesList = [(finalMap Map.! n, EarleyEmit n) | n <- ruleNames]
+
     (rulesStatesList, firstsMap, nextIdAfterRules) =
       foldl' compileRule ([], Map.empty, nextIdAfterFinals) (Map.toList ruleMap)
       where
@@ -355,159 +251,153 @@ compileEarley (Bnf start rules) = EarleyTransducer
                 foldl' compileProd ([], IntSet.empty, nid) prods
               compileProd (s, fs, i) prod =
                 let (f, st, i', _) =
-                      goEarley nullMap prod i (IntSet.singleton finalId)
+                      goEarley prod i (IntSet.singleton finalId)
                 in (s <> st, fs <> f, i')
           in (sts <> newSts, Map.insert name newFirsts fm, nid')
+
     (startFirsts, startStatesRaw, _, startBypass) =
-      goEarley nullMap start nextIdAfterRules (IntSet.singleton earleyAcceptId)
+      goEarley start nextIdAfterRules (IntSet.singleton earleyAcceptId0)
+
     startStates =
-      startFirsts <> bypassStates startBypass (IntSet.singleton earleyAcceptId)
+      startFirsts <> bypassStates startBypass (IntSet.singleton earleyAcceptId0)
+
     allStates = finalStatesList <> rulesStatesList <> startStatesRaw
 
-goEarley
-  :: Map String Bool
-  -> RegEx token
-  -> Int
-  -> IntSet
-  -> (IntSet, [(Int, EarleyState token)], Int, Bool)
-goEarley nullMap = go
-  where
-  go rex nextId dests = case rex of
-    SeqEmpty -> (IntSet.empty, [], nextId, True)
-    NonTerminal name ->
-      ( IntSet.singleton nextId
-      , [(nextId, ESNonterm name dests)]
-      , nextId + 1
-      , Map.findWithDefault False name nullMap
-      )
-    Sequence rex0 rex1 ->
-      let
-        (firsts1, states1, nextId1, bypass1) = go rex1 nextId dests
-        (firsts0, states0, nextId0, bypass0) =
-          go rex0 nextId1 (firsts1 <> bypassStates bypass1 dests)
-      in
-        ( firsts0 <> bypassStates bypass0 firsts1
-        , states0 <> states1
-        , nextId0
-        , bypass0 && bypass1
-        )
-    KleeneStar rex0 ->
-      let
-        (firsts, states, nextId', _) = go rex0 nextId (firsts <> dests)
-      in
-        (firsts, states, nextId', True)
-    KleeneOpt rex0 ->
-      let
-        (firsts, states, nextId', _) = go rex0 nextId dests
-      in
-        (firsts, states, nextId', True)
-    KleenePlus rex0 ->
-      let
-        (firsts, states, nextId', bypass) = go rex0 nextId (firsts <> dests)
-      in
-        (firsts, states, nextId', bypass)
-    RegExam (OneOf chars)
-      | Set.null chars -> (IntSet.empty, [], nextId, False)
-      | otherwise ->
+    bypassStates True = id
+    bypassStates False = const IntSet.empty
+
+    goEarley rex nextId dests = case rex of
+        SeqEmpty -> (IntSet.empty, [], nextId, True)
+        NonTerminal name ->
           ( IntSet.singleton nextId
-          , [(nextId, ESTerminal (TokenClass (OneOf chars)) dests)]
+          , [(nextId, EarleyNonTerminal name dests)]
+          , nextId + 1
+          , Map.findWithDefault False name nullMap
+          )
+        Sequence rex0 rex1 ->
+          let
+            (firsts1, states1, nextId1, bypass1) = goEarley rex1 nextId dests
+            (firsts0, states0, nextId0, bypass0) =
+              goEarley rex0 nextId1 (firsts1 <> bypassStates bypass1 dests)
+          in
+            ( firsts0 <> bypassStates bypass0 firsts1
+            , states0 <> states1
+            , nextId0
+            , bypass0 && bypass1
+            )
+        KleeneStar rex0 ->
+          let
+            (firsts, states, nextId', _) = goEarley rex0 nextId (firsts <> dests)
+          in
+            (firsts, states, nextId', True)
+        KleeneOpt rex0 ->
+          let
+            (firsts, states, nextId', _) = goEarley rex0 nextId dests
+          in
+            (firsts, states, nextId', True)
+        KleenePlus rex0 ->
+          let
+            (firsts, states, nextId', bypass) = goEarley rex0 nextId (firsts <> dests)
+          in
+            (firsts, states, nextId', bypass)
+        RegExam (OneOf chars)
+          | Set.null chars -> (IntSet.empty, [], nextId, False)
+          | otherwise ->
+              ( IntSet.singleton nextId
+              , [(nextId, EarleyTerminal (TokenClass (OneOf chars)) dests)]
+              , nextId + 1
+              , False
+              )
+        RegExam (NotOneOf chars catTest) ->
+          ( IntSet.singleton nextId
+          , [(nextId, EarleyTerminal (TokenClass (NotOneOf chars catTest)) dests)]
           , nextId + 1
           , False
           )
-    RegExam (NotOneOf chars catTest) ->
-      ( IntSet.singleton nextId
-      , [(nextId, ESTerminal (TokenClass (NotOneOf chars catTest)) dests)]
-      , nextId + 1
-      , False
-      )
-    RegExam (Alternate rex0 rex1) ->
-      let
-        (firsts1, states1, nextId1, bypass1) = go rex1 nextId dests
-        (firsts0, states0, nextId0, bypass0) = go rex0 nextId1 dests
-      in
-        ( firsts0 <> firsts1
-        , states0 <> states1
-        , nextId0
-        , bypass0 || bypass1
-        )
-
-matchEarley
-  :: Categorized token => [token] -> EarleyTransducer token -> Bool
+        RegExam (Alternate rex0 rex1) ->
+          let
+            (firsts1, states1, nextId1, bypass1) = goEarley rex1 nextId dests
+            (firsts0, states0, nextId0, bypass0) = goEarley rex0 nextId1 dests
+          in
+            ( firsts0 <> firsts1
+            , states0 <> states1
+            , nextId0
+            , bypass0 || bypass1
+            )
+
+matchEarley :: Categorized token => [token] -> EarleyTransducer token -> Bool
 matchEarley word et = IntSet.member 0 acceptOrigins
   where
+
     initialE0 = IntMap.fromList
-      [ (s, IntSet.singleton 0) | s <- IntSet.toList (etStartStates et) ]
+      [ (s, IntSet.singleton 0) | s <- IntSet.toList (earleyStartStates et) ]
+
     sets0 = IntMap.singleton 0 initialE0
-    sets0closed = closureAt et 0 sets0
+
+    sets0closed = closureAt 0 sets0
+
     (finalSets, n) = runInput 0 sets0closed word
+
     runInput j ss [] = (ss, j)
     runInput j ss (x : xs) =
-      let scanned = scanFrom et j x ss
+      let scanned = scanFrom j x ss
           ss' = IntMap.insert (j + 1) scanned ss
-          closed = closureAt et (j + 1) ss'
+          closed = closureAt (j + 1) ss'
       in runInput (j + 1) closed xs
+
     en = IntMap.findWithDefault IntMap.empty n finalSets
-    acceptOrigins = IntMap.findWithDefault IntSet.empty (etAcceptId et) en
 
-scanFrom
-  :: Categorized token
-  => EarleyTransducer token -> Int -> token
-  -> IntMap (IntMap IntSet) -> IntMap IntSet
-scanFrom et j input ss = IntMap.foldrWithKey advance IntMap.empty e_j
-  where
-    e_j = IntMap.findWithDefault IntMap.empty j ss
-    advance s origs acc = case IntMap.lookup s (etStates et) of
-      Just (ESTerminal cls ds) | tokenClass cls input ->
-        IntSet.foldr
-          (\d -> IntMap.insertWith IntSet.union d origs) acc ds
-      _ -> acc
-
-closureAt
-  :: EarleyTransducer token -> Int
-  -> IntMap (IntMap IntSet) -> IntMap (IntMap IntSet)
-closureAt et j initialSets = loop initialWork initialSets
-  where
-    initialE = IntMap.findWithDefault IntMap.empty j initialSets
-    initialWork =
-      [ (s, i) | (s, os) <- IntMap.toList initialE, i <- IntSet.toList os ]
-    loop [] ss = ss
-    loop ((s, i) : rest) ss = case IntMap.lookup s (etStates et) of
-      Just (ESNonterm name ds) ->
-        let (firsts, isNull) = Map.findWithDefault
-              (IntSet.empty, False) name (etRuleEntries et)
-            predItems = [(f, j) | f <- IntSet.toList firsts]
-            nullItems =
-              if isNull then [(d, i) | d <- IntSet.toList ds] else []
-            (ss', new) = addEarleyItems j (predItems <> nullItems) ss
-        in loop (new <> rest) ss'
-      Just (ESEmit name) ->
-        let e_i = IntMap.findWithDefault IntMap.empty i ss
-            completions =
-              [ (d, i')
-              | (t, os) <- IntMap.toList e_i
-              , Just (ESNonterm n ds) <- [IntMap.lookup t (etStates et)]
-              , n == name
-              , i' <- IntSet.toList os
-              , d <- IntSet.toList ds
-              ]
-            (ss', new) = addEarleyItems j completions ss
-        in loop (new <> rest) ss'
-      _ -> loop rest ss
-
-addEarleyItems
-  :: Int -> [(Int, Int)] -> IntMap (IntMap IntSet)
-  -> (IntMap (IntMap IntSet), [(Int, Int)])
-addEarleyItems j items ss = foldl' ins (ss, []) items
-  where
-    ins (acc, new) (state, origin) =
-      let e_j = IntMap.findWithDefault IntMap.empty j acc
-          os = IntMap.findWithDefault IntSet.empty state e_j
-      in if IntSet.member origin os
-         then (acc, new)
-         else
-           let e_j' = IntMap.insert state (IntSet.insert origin os) e_j
-               acc' = IntMap.insert j e_j' acc
-           in (acc', (state, origin) : new)
+    acceptOrigins = IntMap.findWithDefault IntSet.empty (earleyAcceptId et) en
+
+    scanFrom j input ss = IntMap.foldrWithKey advance IntMap.empty e_j
+      where
+        e_j = IntMap.findWithDefault IntMap.empty j ss
+        advance s origs acc = case IntMap.lookup s (earleyStates et) of
+          Just (EarleyTerminal cls ds) | tokenClass cls input ->
+            IntSet.foldr
+              (\d -> IntMap.insertWith IntSet.union d origs) acc ds
+          _ -> acc
+
+    closureAt j initialSets = loop initialWork initialSets
+      where
+        initialE = IntMap.findWithDefault IntMap.empty j initialSets
+        initialWork =
+          [ (s, i) | (s, os) <- IntMap.toList initialE, i <- IntSet.toList os ]
+        loop [] ss = ss
+        loop ((s, i) : rest) ss = case IntMap.lookup s (earleyStates et) of
+          Just (EarleyNonTerminal name ds) ->
+            let (firsts, isNull) = Map.findWithDefault
+                  (IntSet.empty, False) name (earleyRules et)
+                predItems = [(f, j) | f <- IntSet.toList firsts]
+                nullItems =
+                  if isNull then [(d, i) | d <- IntSet.toList ds] else []
+                (ss', new) = addEarleyItems j (predItems <> nullItems) ss
+            in loop (new <> rest) ss'
+          Just (EarleyEmit name) ->
+            let e_i = IntMap.findWithDefault IntMap.empty i ss
+                completions =
+                  [ (d, i')
+                  | (t, os) <- IntMap.toList e_i
+                  , Just (EarleyNonTerminal n' ds) <- [IntMap.lookup t (earleyStates et)]
+                  , n' == name
+                  , i' <- IntSet.toList os
+                  , d <- IntSet.toList ds
+                  ]
+                (ss', new) = addEarleyItems j completions ss
+            in loop (new <> rest) ss'
+          _ -> loop rest ss
+
+    addEarleyItems j items ss = foldl' ins (ss, []) items
+      where
+        ins (acc, new) (state, origin) =
+          let e_j = IntMap.findWithDefault IntMap.empty j acc
+              os = IntMap.findWithDefault IntSet.empty state e_j
+          in if IntSet.member origin os
+            then (acc, new)
+            else
+              let e_j' = IntMap.insert state (IntSet.insert origin os) e_j
+                  acc' = IntMap.insert j e_j' acc
+              in (acc', (state, origin) : new)
 
 -- instances
 instance (Ord rule, NonTerminalSymbol rule)
@@ -553,8 +443,6 @@ instance Categorized token
     word =~ bnf = matchEarley word (compileEarley bnf)
 instance Categorized token
   => Matching [token] (RegEx token) where
-    word =~ pattern
-      | containsNonTerminal pattern = word =~ liftBnf0 pattern
-      | otherwise = matchThompson word pattern
+    word =~ pattern = word =~ liftBnf0 pattern
 instance Matching s (APrism s t a b) where
   word =~ pattern = is pattern word
diff --git a/test/Main.hs b/test/Main.hs
index 39c9f1cf..140b217a 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -40,8 +40,6 @@ main = do
     describe "lenvecGrammar" $ for_ lenvecExamples $ testCtxGrammar True lenvecGrammar
     describe "chainGrammar" $ for_ chainExamples $ testGrammar True chainGrammar
     describe "Parsector try rollback" tryRollbackTests
-    describe "Thompson matcher" thompsonMatcherTests
-    describe "Earley matcher" earleyMatcherTests
     describe "Kleene" kleeneProperties
     describe "meander" meanderProperties
 
@@ -60,212 +58,6 @@ tryRollbackTests = do
     parsecStream actual `shouldBe` ""
     parsecResult actual `shouldBe` (Nothing :: Maybe String)
 
-thompsonMatcherTests :: Spec
-thompsonMatcherTests = do
-  let
-    sandwich = terminal "a" <> starK (terminal "b") <> terminal "a"
-    evenA = starK (sandwich >|< terminal "b")
-    nullableSeq = optK (terminal "a") <> terminal "b"
-    nestedStar = starK (starK (terminal "a"))
-    onePlusA = plusK (terminal "a")
-    altThenTerm = (terminal "a" >|< terminal "b") <> terminal "c"
-    notXY = notOneOf ("xy" :: String) :: RegEx Char
-    abcClass = oneOf ("abc" :: String) :: RegEx Char
-    checks :: [(String, RegEx Char, [(String, Bool)])]
-    checks =
-      [ ( "paper sandwich language"
-        , sandwich
-        , [ ("aa", True)
-          , ("aba", True)
-          , ("abbba", True)
-          , ("", False)
-          , ("a", False)
-          , ("ab", False)
-          , ("baa", False)
-          ]
-        )
-      , ( "paper even-a language"
-        , evenA
-        , [ ("", True)
-          , ("b", True)
-          , ("bb", True)
-          , ("aa", True)
-          , ("abba", True)
-          , ("baab", True)
-          , ("abaaab", True)
-          , ("a", False)
-          , ("ababa", False)
-          ]
-        )
-      , ( "nullable sequence wiring"
-        , nullableSeq
-        , [ ("b", True)
-          , ("ab", True)
-          , ("", False)
-          , ("a", False)
-          ]
-        )
-      , ( "empty and empty-class handling"
-        , mempty
-        , [ ("", True)
-          , ("a", False)
-          ]
-        )
-      , ( "failing token class"
-        , zeroK
-        , [ ("", False)
-          , ("a", False)
-          ]
-        )
-      , ( "nested star (a*)*"
-        , nestedStar
-        , [ ("", True)
-          , ("a", True)
-          , ("aaaa", True)
-          , ("b", False)
-          , ("aab", False)
-          ]
-        )
-      , ( "kleene plus a+"
-        , onePlusA
-        , [ ("a", True)
-          , ("aaa", True)
-          , ("", False)
-          , ("ab", False)
-          , ("b", False)
-          ]
-        )
-      , ( "alternate then terminal (a|b)c"
-        , altThenTerm
-        , [ ("ac", True)
-          , ("bc", True)
-          , ("", False)
-          , ("c", False)
-          , ("abc", False)
-          , ("a", False)
-          ]
-        )
-      , ( "notOneOf \"xy\""
-        , notXY
-        , [ ("a", True)
-          , ("z", True)
-          , ("x", False)
-          , ("y", False)
-          , ("", False)
-          , ("ab", False)
-          ]
-        )
-      , ( "oneOf \"abc\""
-        , abcClass
-        , [ ("a", True)
-          , ("b", True)
-          , ("c", True)
-          , ("d", False)
-          , ("", False)
-          , ("ab", False)
-          ]
-        )
-      ]
-
-  for_ checks $ \(label, rex, cases) ->
-    for_ cases $ \(word, expected) ->
-      it (label <> " matches " <> show word) $ do
-        let thompsonMatch = word =~ rex
-        let earleyMatch = word =~ liftBnf0 rex
-        thompsonMatch `shouldBe` expected
-        thompsonMatch `shouldBe` earleyMatch
-
-earleyMatcherTests :: Spec
-earleyMatcherTests = do
-  let
-    parens :: Bnf (RegEx Char)
-    parens = ruleRec "S" $ \s ->
-      mempty >|< terminal "(" <> s <> terminal ")"
-    -- S = A B; A = "a"; B = "b"
-    ab :: Bnf (RegEx Char)
-    ab = rule "A" (terminal "a")
-      <> rule "B" (terminal "b")
-    -- Left-recursive: A = A 'x' | 'x'
-    leftRec :: Bnf (RegEx Char)
-    leftRec = ruleRec "A" $ \a ->
-      a <> terminal "x" >|< terminal "x"
-    -- Mutual recursion with nullable bridges.
-    -- S = A B; A = 'a'?; B = 'b'
-    mutualNull :: Bnf (RegEx Char)
-    mutualNull = rule "A" (optK (terminal "a"))
-      <> rule "B" (terminal "b")
-    -- Ambiguous palindromes over 'a','b':
-    -- P = 'a' P 'a' | 'b' P 'b' | 'a' | 'b' | ε
-    palindrome :: Bnf (RegEx Char)
-    palindrome = ruleRec "P" $ \p ->
-      terminal "a" <> p <> terminal "a"
-      >|< terminal "b" <> p <> terminal "b"
-      >|< terminal "a"
-      >|< terminal "b"
-      >|< mempty
-    checks :: [(String, Bnf (RegEx Char), [(String, Bool)])]
-    checks =
-      [ ( "balanced parens S = (S) | ε"
-        , parens
-        , [ ("", True)
-          , ("()", True)
-          , ("(())", True)
-          , ("((()))", True)
-          , ("(", False)
-          , (")", False)
-          , ("(()", False)
-          , ("())", False)
-          ]
-        )
-      , ( "two-rule concat A B"
-        , ab
-        , [ ("ab", True)
-          , ("", False)
-          , ("a", False)
-          , ("b", False)
-          , ("ba", False)
-          , ("abc", False)
-          ]
-        )
-      , ( "left recursion A = A x | x"
-        , leftRec
-        , [ ("x", True)
-          , ("xx", True)
-          , ("xxxx", True)
-          , ("", False)
-          , ("y", False)
-          , ("xy", False)
-          ]
-        )
-      , ( "nullable nonterminal A?B"
-        , mutualNull
-        , [ ("b", True)
-          , ("ab", True)
-          , ("", False)
-          , ("a", False)
-          , ("bb", False)
-          ]
-        )
-      , ( "ambiguous palindromes"
-        , palindrome
-        , [ ("", True)
-          , ("a", True)
-          , ("b", True)
-          , ("aa", True)
-          , ("aba", True)
-          , ("abba", True)
-          , ("abaaba", True)
-          , ("ab", False)
-          , ("abab", False)
-          , ("abca", False)
-          ]
-        )
-      ]
-  for_ checks $ \(label, bnf, cases) ->
-    for_ cases $ \(word, expected) ->
-      it (label <> " matches " <> show word) $
-        (word =~ bnf) `shouldBe` expected
-
 doctests :: IO ()
 doctests = do
   let

From 3495538b71d3ab31cb9241a8f04e2357996a7093 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Wed, 22 Apr 2026 10:16:55 -0700
Subject: [PATCH 07/31] Update Main.hs

---
 test/Main.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/Main.hs b/test/Main.hs
index 140b217a..ae792432 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -34,7 +34,7 @@ main = do
     describe "semverGrammar" $ for_ semverExamples $ testGrammar True semverGrammar
     describe "semverCtxGrammar" $ for_ semverExamples $ testCtxGrammar True semverCtxGrammar
     describe "arithGrammar" $ for_ arithExamples $ testGrammar True arithGrammar
-    describe "jsonGrammar" $ for_ jsonExamples $ testCtxGrammar False jsonGrammar
+    describe "jsonGrammar" $ for_ jsonExamples $ testGrammar False jsonGrammar
     describe "sexprGrammar" $ for_ sexprExamples $ testGrammar True sexprGrammar
     describe "lambdaGrammar" $ for_ lambdaExamples $ testGrammar True lambdaGrammar
     describe "lenvecGrammar" $ for_ lenvecExamples $ testCtxGrammar True lenvecGrammar

From d97009f117097b94053fb08c84886c58259241e3 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Wed, 22 Apr 2026 16:21:32 -0700
Subject: [PATCH 08/31] matching & generation via Thompson-Earley

---
 distributors.cabal                     |   2 +
 package.yaml                           |   1 +
 src/Control/Lens/Grammar.hs            |   6 +
 src/Control/Lens/Grammar/BackusNaur.hs | 244 -----------------
 src/Control/Lens/Grammar/Kleene.hs     |  11 +
 src/Control/Lens/Grammar/Matching.hs   | 353 +++++++++++++++++++++++++
 src/Control/Lens/Grammar/Token.hs      |  11 +
 src/Data/Profunctor/Grammar.hs         |   1 +
 test/Main.hs                           |  42 +--
 9 files changed, 412 insertions(+), 259 deletions(-)
 create mode 100644 src/Control/Lens/Grammar/Matching.hs

diff --git a/distributors.cabal b/distributors.cabal
index 546551fa..ed7a02ce 100644
--- a/distributors.cabal
+++ b/distributors.cabal
@@ -36,6 +36,7 @@ library
       Control.Lens.Grammar.Internal.NestedPrismTH
       Control.Lens.Grammar.Internal.Orphanage
       Control.Lens.Grammar.Kleene
+      Control.Lens.Grammar.Matching
       Control.Lens.Grammar.Symbol
       Control.Lens.Grammar.Token
       Control.Lens.Grate
@@ -98,6 +99,7 @@ library
   ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints
   build-depends:
       MemoTrie >=0.6 && <1
+    , QuickCheck >=2.14 && <3
     , adjunctions >=4.4 && <5
     , base >=4.15 && <5
     , bifunctors >=5.5 && <6
diff --git a/package.yaml b/package.yaml
index 9f1c9a07..f0e85510 100644
--- a/package.yaml
+++ b/package.yaml
@@ -27,6 +27,7 @@ dependencies:
 - distributive >= 0.6 && < 1
 - lens >= 5.0 && < 6
 - MemoTrie >= 0.6 && < 1
+- QuickCheck >= 2.14 && < 3
 - mtl >= 2.2 && < 3
 - profunctors >= 5.6 && < 6
 - tagged >= 0.8 && < 1
diff --git a/src/Control/Lens/Grammar.hs b/src/Control/Lens/Grammar.hs
index 0cf344f7..39720532 100644
--- a/src/Control/Lens/Grammar.hs
+++ b/src/Control/Lens/Grammar.hs
@@ -24,6 +24,7 @@ module Control.Lens.Grammar
   , regbnfG
   , regbnfGrammar
   , applicativeG
+  , languageG
     -- * Context-sensitive grammar
   , CtxGrammar
   , printG
@@ -45,6 +46,7 @@ import Control.Lens.PartialIso
 import Control.Lens.Grammar.BackusNaur
 import Control.Lens.Grammar.Boole
 import Control.Lens.Grammar.Kleene
+import Control.Lens.Grammar.Matching
 import Control.Lens.Grammar.Token
 import Control.Lens.Grammar.Symbol
 import Data.Bifunctor.Joker
@@ -67,6 +69,7 @@ import Witherable
 import Control.Lens.Grammar.BackusNaur as X
 import Control.Lens.Grammar.Boole as X
 import Control.Lens.Grammar.Kleene as X
+import Control.Lens.Grammar.Matching as X
 import Control.Lens.Grammar.Symbol as X
 import Control.Lens.Grammar.Token as X
 import Control.Lens.PartialIso as X
@@ -789,6 +792,9 @@ It can apply to a `RegGrammar`.
 regbnfG :: Grammar Char a -> RegBnf
 regbnfG bnf = runGrammor bnf
 
+languageG :: (Applicative f, TokenAlgebra token (f token)) => Grammar token a -> f [[token]]
+languageG bnf = languageGen (runGrammor bnf)
+
 {- | `printG` generates a printer from a `CtxGrammar`.
 Since both `RegGrammar`s and context-free `Grammar`s are `CtxGrammar`s,
 the type system will allow `printG` to be applied to them.
diff --git a/src/Control/Lens/Grammar/BackusNaur.hs b/src/Control/Lens/Grammar/BackusNaur.hs
index c5868f31..ebfe058a 100644
--- a/src/Control/Lens/Grammar/BackusNaur.hs
+++ b/src/Control/Lens/Grammar/BackusNaur.hs
@@ -19,13 +19,10 @@ module Control.Lens.Grammar.BackusNaur
   , liftBnf0
   , liftBnf1
   , liftBnf2
-    -- * Matching
-  , Matching (..)
   , diffB
   ) where
 
 import Control.Lens
-import Control.Lens.Extras
 import Control.Lens.Grammar.Kleene
 import Control.Lens.Grammar.Token
 import Control.Lens.Grammar.Symbol
@@ -33,12 +30,6 @@ import Data.Bifunctor.Joker
 import Data.Coerce
 import Data.Foldable
 import Data.Function
-import qualified Data.IntMap.Strict as IntMap
-import Data.IntMap.Strict (IntMap)
-import qualified Data.IntSet as IntSet
-import Data.IntSet (IntSet)
-import qualified Data.Map.Strict as Map
-import Data.Map.Strict (Map)
 import Data.MemoTrie
 import qualified Data.Set as Set
 import Data.Set (Set)
@@ -113,11 +104,6 @@ liftBnf2
 liftBnf2 f (Bnf start0 rules0) (Bnf start1 rules1) =
   Bnf (f start0 start1) (Set.map coerce rules0 <> Set.map coerce rules1)
 
--- | Does a word match a pattern?
-class Matching word pattern | pattern -> word where
-  (=~) :: word -> pattern -> Bool
-  infix 2 =~
-
 {- |
 The [Brzozowski derivative]
 (https://dl.acm.org/doi/pdf/10.1145/321239.321249) of a
@@ -177,228 +163,6 @@ rulesNamed nameX = foldl' (flip inserter) Set.empty where
   inserter (nameY,y) =
     if nameX == nameY then Set.insert y else id
 
--- | A state in the Earley-extended Thompson transducer for a `Bnf`.
--- @EarleyTerminal cls ds@ matches on a token class and transitions to @ds@.
--- @EarleyNonTerminal name ds@ is a call point for rule @name@; after @name@
--- completes, control flows to @ds@. @EarleyEmit name@ is the final state
--- for rule @name@ and triggers completion during Earley closure.
-data EarleyState token
-  = EarleyTerminal (TokenClass token) IntSet
-  | EarleyNonTerminal String IntSet
-  | EarleyEmit String
-
-data EarleyTransducer token = EarleyTransducer
-  { earleyStates :: IntMap (EarleyState token)
-  , earleyRules :: Map String (IntSet, Bool)
-  , earleyAcceptId :: Int
-  , earleyStartStates :: IntSet
-  }
-
-compileEarley :: Bnf (RegEx token) -> EarleyTransducer token
-compileEarley (Bnf start rules) = EarleyTransducer
-  { earleyStates = IntMap.fromList allStates
-  , earleyRules = Map.fromList
-      [ ( n
-        , ( Map.findWithDefault IntSet.empty n firstsMap
-          , Map.findWithDefault False n nullMap
-          )
-        )
-      | n <- Map.keys ruleMap
-      ]
-  , earleyAcceptId = earleyAcceptId0
-  , earleyStartStates = startStates
-  }
-
-  where
-
-    ruleMap = foldr
-      (\(n, r) -> Map.insertWith (++) n [r]) Map.empty (toList rules)
-
-    rexNullable nm = \case
-      SeqEmpty -> True
-      NonTerminal n -> Map.findWithDefault False n nm
-      Sequence x y -> rexNullable nm x && rexNullable nm y
-      KleeneStar _ -> True
-      KleeneOpt _ -> True
-      KleenePlus x -> rexNullable nm x
-      RegExam (Alternate x y) -> rexNullable nm x || rexNullable nm y
-      RegExam (OneOf _) -> False
-      RegExam (NotOneOf _ _) -> False
-
-    iterNull nm =
-      let nm' = Map.mapWithKey
-            (\n _ -> any (rexNullable nm) (Map.findWithDefault [] n ruleMap)) nm
-      in if nm == nm' then nm else iterNull nm'
-
-    nullMap = iterNull (Map.map (const False) ruleMap)
-
-    ruleNames = Map.keys ruleMap
-
-    earleyAcceptId0 = 0
-
-    (finalMap, nextIdAfterFinals) =
-      foldl' alloc (Map.empty, earleyAcceptId0 + 1) ruleNames
-      where alloc (m, i) n = (Map.insert n i m, i + 1)
-
-    finalStatesList = [(finalMap Map.! n, EarleyEmit n) | n <- ruleNames]
-
-    (rulesStatesList, firstsMap, nextIdAfterRules) =
-      foldl' compileRule ([], Map.empty, nextIdAfterFinals) (Map.toList ruleMap)
-      where
-        compileRule (sts, fm, nid) (name, prods) =
-          let finalId = finalMap Map.! name
-              (newSts, newFirsts, nid') =
-                foldl' compileProd ([], IntSet.empty, nid) prods
-              compileProd (s, fs, i) prod =
-                let (f, st, i', _) =
-                      goEarley prod i (IntSet.singleton finalId)
-                in (s <> st, fs <> f, i')
-          in (sts <> newSts, Map.insert name newFirsts fm, nid')
-
-    (startFirsts, startStatesRaw, _, startBypass) =
-      goEarley start nextIdAfterRules (IntSet.singleton earleyAcceptId0)
-
-    startStates =
-      startFirsts <> bypassStates startBypass (IntSet.singleton earleyAcceptId0)
-
-    allStates = finalStatesList <> rulesStatesList <> startStatesRaw
-
-    bypassStates True = id
-    bypassStates False = const IntSet.empty
-
-    goEarley rex nextId dests = case rex of
-        SeqEmpty -> (IntSet.empty, [], nextId, True)
-        NonTerminal name ->
-          ( IntSet.singleton nextId
-          , [(nextId, EarleyNonTerminal name dests)]
-          , nextId + 1
-          , Map.findWithDefault False name nullMap
-          )
-        Sequence rex0 rex1 ->
-          let
-            (firsts1, states1, nextId1, bypass1) = goEarley rex1 nextId dests
-            (firsts0, states0, nextId0, bypass0) =
-              goEarley rex0 nextId1 (firsts1 <> bypassStates bypass1 dests)
-          in
-            ( firsts0 <> bypassStates bypass0 firsts1
-            , states0 <> states1
-            , nextId0
-            , bypass0 && bypass1
-            )
-        KleeneStar rex0 ->
-          let
-            (firsts, states, nextId', _) = goEarley rex0 nextId (firsts <> dests)
-          in
-            (firsts, states, nextId', True)
-        KleeneOpt rex0 ->
-          let
-            (firsts, states, nextId', _) = goEarley rex0 nextId dests
-          in
-            (firsts, states, nextId', True)
-        KleenePlus rex0 ->
-          let
-            (firsts, states, nextId', bypass) = goEarley rex0 nextId (firsts <> dests)
-          in
-            (firsts, states, nextId', bypass)
-        RegExam (OneOf chars)
-          | Set.null chars -> (IntSet.empty, [], nextId, False)
-          | otherwise ->
-              ( IntSet.singleton nextId
-              , [(nextId, EarleyTerminal (TokenClass (OneOf chars)) dests)]
-              , nextId + 1
-              , False
-              )
-        RegExam (NotOneOf chars catTest) ->
-          ( IntSet.singleton nextId
-          , [(nextId, EarleyTerminal (TokenClass (NotOneOf chars catTest)) dests)]
-          , nextId + 1
-          , False
-          )
-        RegExam (Alternate rex0 rex1) ->
-          let
-            (firsts1, states1, nextId1, bypass1) = goEarley rex1 nextId dests
-            (firsts0, states0, nextId0, bypass0) = goEarley rex0 nextId1 dests
-          in
-            ( firsts0 <> firsts1
-            , states0 <> states1
-            , nextId0
-            , bypass0 || bypass1
-            )
-
-matchEarley :: Categorized token => [token] -> EarleyTransducer token -> Bool
-matchEarley word et = IntSet.member 0 acceptOrigins
-  where
-
-    initialE0 = IntMap.fromList
-      [ (s, IntSet.singleton 0) | s <- IntSet.toList (earleyStartStates et) ]
-
-    sets0 = IntMap.singleton 0 initialE0
-
-    sets0closed = closureAt 0 sets0
-
-    (finalSets, n) = runInput 0 sets0closed word
-
-    runInput j ss [] = (ss, j)
-    runInput j ss (x : xs) =
-      let scanned = scanFrom j x ss
-          ss' = IntMap.insert (j + 1) scanned ss
-          closed = closureAt (j + 1) ss'
-      in runInput (j + 1) closed xs
-
-    en = IntMap.findWithDefault IntMap.empty n finalSets
-
-    acceptOrigins = IntMap.findWithDefault IntSet.empty (earleyAcceptId et) en
-
-    scanFrom j input ss = IntMap.foldrWithKey advance IntMap.empty e_j
-      where
-        e_j = IntMap.findWithDefault IntMap.empty j ss
-        advance s origs acc = case IntMap.lookup s (earleyStates et) of
-          Just (EarleyTerminal cls ds) | tokenClass cls input ->
-            IntSet.foldr
-              (\d -> IntMap.insertWith IntSet.union d origs) acc ds
-          _ -> acc
-
-    closureAt j initialSets = loop initialWork initialSets
-      where
-        initialE = IntMap.findWithDefault IntMap.empty j initialSets
-        initialWork =
-          [ (s, i) | (s, os) <- IntMap.toList initialE, i <- IntSet.toList os ]
-        loop [] ss = ss
-        loop ((s, i) : rest) ss = case IntMap.lookup s (earleyStates et) of
-          Just (EarleyNonTerminal name ds) ->
-            let (firsts, isNull) = Map.findWithDefault
-                  (IntSet.empty, False) name (earleyRules et)
-                predItems = [(f, j) | f <- IntSet.toList firsts]
-                nullItems =
-                  if isNull then [(d, i) | d <- IntSet.toList ds] else []
-                (ss', new) = addEarleyItems j (predItems <> nullItems) ss
-            in loop (new <> rest) ss'
-          Just (EarleyEmit name) ->
-            let e_i = IntMap.findWithDefault IntMap.empty i ss
-                completions =
-                  [ (d, i')
-                  | (t, os) <- IntMap.toList e_i
-                  , Just (EarleyNonTerminal n' ds) <- [IntMap.lookup t (earleyStates et)]
-                  , n' == name
-                  , i' <- IntSet.toList os
-                  , d <- IntSet.toList ds
-                  ]
-                (ss', new) = addEarleyItems j completions ss
-            in loop (new <> rest) ss'
-          _ -> loop rest ss
-
-    addEarleyItems j items ss = foldl' ins (ss, []) items
-      where
-        ins (acc, new) (state, origin) =
-          let e_j = IntMap.findWithDefault IntMap.empty j acc
-              os = IntMap.findWithDefault IntSet.empty state e_j
-          in if IntSet.member origin os
-            then (acc, new)
-            else
-              let e_j' = IntMap.insert state (IntSet.insert origin os) e_j
-                  acc' = IntMap.insert j e_j' acc
-              in (acc', (state, origin) : new)
-
 -- instances
 instance (Ord rule, NonTerminalSymbol rule)
   => BackusNaurForm (Bnf rule) where
@@ -438,11 +202,3 @@ instance (Ord rule, Monoid rule) => Monoid (Bnf rule) where
   mempty = liftBnf0 mempty
 instance (Ord rule, Semigroup rule) => Semigroup (Bnf rule) where
   (<>) = liftBnf2 (<>)
-instance Categorized token
-  => Matching [token] (Bnf (RegEx token)) where
-    word =~ bnf = matchEarley word (compileEarley bnf)
-instance Categorized token
-  => Matching [token] (RegEx token) where
-    word =~ pattern = word =~ liftBnf0 pattern
-instance Matching s (APrism s t a b) where
-  word =~ pattern = is pattern word
diff --git a/src/Control/Lens/Grammar/Kleene.hs b/src/Control/Lens/Grammar/Kleene.hs
index 52d74473..5ffce0b1 100644
--- a/src/Control/Lens/Grammar/Kleene.hs
+++ b/src/Control/Lens/Grammar/Kleene.hs
@@ -38,6 +38,9 @@ import Data.Profunctor.Distributor
 import Data.Set (Set)
 import qualified Data.Set as Set
 import GHC.Generics
+import Test.QuickCheck.Arbitrary
+import Test.QuickCheck.Gen (Gen)
+import qualified Test.QuickCheck.Gen as Gen
 import Text.ParserCombinators.ReadP (ReadP)
 import qualified Text.ParserCombinators.ReadP as ReadP
 
@@ -225,6 +228,14 @@ instance TokenAlgebra token (f token)
     tokenClass = Joker . tokenClass
 instance TokenAlgebra Char (ReadP Char) where
   tokenClass = ReadP.satisfy . tokenClass
+instance (Categorized token, Arbitrary token) => TokenAlgebra token (Gen token) where
+  tokenClass (TokenClass exam) = case exam of
+    OneOf xs -> Gen.elements (toList xs)
+    NotOneOf xs (AndAsIn cat) -> arbitrary `Gen.suchThat`
+      (\x -> x `notElem` xs && categorize x == cat)
+    NotOneOf xs (AndNotAsIn cats) -> arbitrary `Gen.suchThat`
+      (\x -> x `notElem` xs && categorize x `notElem` cats)
+    Alternate cls1 cls2 -> Gen.oneof [tokenClass cls1, tokenClass cls2]
 instance Categorized token => Monoid (RegEx token) where
   mempty = SeqEmpty
 instance Categorized token => Semigroup (RegEx token) where
diff --git a/src/Control/Lens/Grammar/Matching.hs b/src/Control/Lens/Grammar/Matching.hs
new file mode 100644
index 00000000..0bb6de50
--- /dev/null
+++ b/src/Control/Lens/Grammar/Matching.hs
@@ -0,0 +1,353 @@
+{- |
+Module      : Control.Lens.Grammar.Matching
+Description : pattern matching & language generation
+Copyright   : (C) 2026 - Eitan Chatav
+License     : BSD-style (see the file LICENSE)
+Maintainer  : Eitan Chatav <eitan.chatav@gmail.com>
+Stability   : provisional
+Portability : non-portable
+
+https://www.cs.dartmouth.edu/~doug/nfa.pdf
+http://trevorjim.com/papers/ldta-2009.pdf
+-}
+
+module Control.Lens.Grammar.Matching
+  ( Matching (..)
+  , languageGen
+  ) where
+
+import Control.Lens
+import Control.Lens.Extras
+import Control.Lens.Grammar.BackusNaur
+import Control.Lens.Grammar.Kleene
+import Control.Lens.Grammar.Token
+import Data.Foldable
+import qualified Data.IntMap.Strict as IntMap
+import Data.IntMap.Strict (IntMap)
+import qualified Data.IntSet as IntSet
+import Data.IntSet (IntSet)
+import qualified Data.Map.Strict as Map
+import Data.Map.Strict (Map)
+import qualified Data.Set as Set
+
+-- | Does a word match a pattern?
+class Matching word pattern | pattern -> word where
+  (=~) :: word -> pattern -> Bool
+  infix 2 =~
+
+-- | A state in the Earley-extended Thompson transducer for a `Bnf`.
+-- @EarleyTerminal cls ds@ matches on a token class and transitions to @ds@.
+-- @EarleyNonTerminal name ds@ is a call point for rule @name@; after @name@
+-- completes, control flows to @ds@. @EarleyEmit name@ is the final state
+-- for rule @name@ and triggers completion during Earley closure.
+data EarleyState token
+  = EarleyTerminal (TokenClass token) IntSet
+  | EarleyNonTerminal String IntSet
+  | EarleyEmit String
+
+data EarleyTransducer token = EarleyTransducer
+  { earleyStates :: IntMap (EarleyState token)
+  , earleyRules :: Map String (IntSet, Bool)
+  , earleyAcceptId :: Int
+  , earleyStartStates :: IntSet
+  }
+
+compileEarley :: Bnf (RegEx token) -> EarleyTransducer token
+compileEarley (Bnf start rules) = EarleyTransducer
+  { earleyStates = IntMap.fromList allStates
+  , earleyRules = Map.fromList
+      [ ( n
+        , ( Map.findWithDefault IntSet.empty n firstsMap
+          , Map.findWithDefault False n nullMap
+          )
+        )
+      | n <- Map.keys ruleMap
+      ]
+  , earleyAcceptId = earleyAcceptId0
+  , earleyStartStates = startStates
+  }
+
+  where
+
+    ruleMap = foldr
+      (\(n, r) -> Map.insertWith (++) n [r]) Map.empty (toList rules)
+
+    rexNullable nm = \case
+      SeqEmpty -> True
+      NonTerminal n -> Map.findWithDefault False n nm
+      Sequence x y -> rexNullable nm x && rexNullable nm y
+      KleeneStar _ -> True
+      KleeneOpt _ -> True
+      KleenePlus x -> rexNullable nm x
+      RegExam (Alternate x y) -> rexNullable nm x || rexNullable nm y
+      RegExam (OneOf _) -> False
+      RegExam (NotOneOf _ _) -> False
+
+    iterNull nm =
+      let nm' = Map.mapWithKey
+            (\n _ -> any (rexNullable nm) (Map.findWithDefault [] n ruleMap)) nm
+      in if nm == nm' then nm else iterNull nm'
+
+    nullMap = iterNull (Map.map (const False) ruleMap)
+
+    ruleNames = Map.keys ruleMap
+
+    earleyAcceptId0 = 0
+
+    (finalMap, nextIdAfterFinals) =
+      foldl' alloc (Map.empty, earleyAcceptId0 + 1) ruleNames
+      where alloc (m, i) n = (Map.insert n i m, i + 1)
+
+    finalStatesList = [(finalMap Map.! n, EarleyEmit n) | n <- ruleNames]
+
+    (rulesStatesList, firstsMap, nextIdAfterRules) =
+      foldl' compileRule ([], Map.empty, nextIdAfterFinals) (Map.toList ruleMap)
+      where
+        compileRule (sts, fm, nid) (name, prods) =
+          let finalId = finalMap Map.! name
+              (newSts, newFirsts, nid') =
+                foldl' compileProd ([], IntSet.empty, nid) prods
+              compileProd (s, fs, i) prod =
+                let (f, st, i', _) =
+                      goEarley prod i (IntSet.singleton finalId)
+                in (s <> st, fs <> f, i')
+          in (sts <> newSts, Map.insert name newFirsts fm, nid')
+
+    (startFirsts, startStatesRaw, _, startBypass) =
+      goEarley start nextIdAfterRules (IntSet.singleton earleyAcceptId0)
+
+    startStates =
+      startFirsts <> bypassStates startBypass (IntSet.singleton earleyAcceptId0)
+
+    allStates = finalStatesList <> rulesStatesList <> startStatesRaw
+
+    bypassStates True = id
+    bypassStates False = const IntSet.empty
+
+    goEarley rex nextId dests = case rex of
+        SeqEmpty -> (IntSet.empty, [], nextId, True)
+        NonTerminal name ->
+          ( IntSet.singleton nextId
+          , [(nextId, EarleyNonTerminal name dests)]
+          , nextId + 1
+          , Map.findWithDefault False name nullMap
+          )
+        Sequence rex0 rex1 ->
+          let
+            (firsts1, states1, nextId1, bypass1) = goEarley rex1 nextId dests
+            (firsts0, states0, nextId0, bypass0) =
+              goEarley rex0 nextId1 (firsts1 <> bypassStates bypass1 dests)
+          in
+            ( firsts0 <> bypassStates bypass0 firsts1
+            , states0 <> states1
+            , nextId0
+            , bypass0 && bypass1
+            )
+        KleeneStar rex0 ->
+          let
+            (firsts, states, nextId', _) = goEarley rex0 nextId (firsts <> dests)
+          in
+            (firsts, states, nextId', True)
+        KleeneOpt rex0 ->
+          let
+            (firsts, states, nextId', _) = goEarley rex0 nextId dests
+          in
+            (firsts, states, nextId', True)
+        KleenePlus rex0 ->
+          let
+            (firsts, states, nextId', bypass) = goEarley rex0 nextId (firsts <> dests)
+          in
+            (firsts, states, nextId', bypass)
+        RegExam (OneOf chars)
+          | Set.null chars -> (IntSet.empty, [], nextId, False)
+          | otherwise ->
+              ( IntSet.singleton nextId
+              , [(nextId, EarleyTerminal (TokenClass (OneOf chars)) dests)]
+              , nextId + 1
+              , False
+              )
+        RegExam (NotOneOf chars catTest) ->
+          ( IntSet.singleton nextId
+          , [(nextId, EarleyTerminal (TokenClass (NotOneOf chars catTest)) dests)]
+          , nextId + 1
+          , False
+          )
+        RegExam (Alternate rex0 rex1) ->
+          let
+            (firsts1, states1, nextId1, bypass1) = goEarley rex1 nextId dests
+            (firsts0, states0, nextId0, bypass0) = goEarley rex0 nextId1 dests
+          in
+            ( firsts0 <> firsts1
+            , states0 <> states1
+            , nextId0
+            , bypass0 || bypass1
+            )
+
+matchEarley :: Categorized token => [token] -> EarleyTransducer token -> Bool
+matchEarley word et = acceptsChart n finalSets et
+  where
+
+    initialE0 = IntMap.fromList
+      [ (s, IntSet.singleton 0) | s <- IntSet.toList (earleyStartStates et) ]
+
+    sets0 = closeChartAt 0 (IntMap.singleton 0 initialE0) et
+
+    (finalSets, n) = runInput 0 sets0 word
+
+    runInput j ss [] = (ss, j)
+    runInput j ss (x : xs) =
+      let scanned = scanFrom j x ss
+          closed = closeChartAt (j + 1) (IntMap.insert (j + 1) scanned ss) et
+      in runInput (j + 1) closed xs
+
+    scanFrom j input ss = IntMap.foldrWithKey advance IntMap.empty e_j
+      where
+        e_j = IntMap.findWithDefault IntMap.empty j ss
+        advance s origs acc = case IntMap.lookup s (earleyStates et) of
+          Just (EarleyTerminal cls ds) | tokenClass cls input ->
+            IntSet.foldr
+              (\d -> IntMap.insertWith IntSet.union d origs) acc ds
+          _ -> acc
+-- instances
+instance Categorized token
+  => Matching [token] (Bnf (RegEx token)) where
+    word =~ bnf = matchEarley word (compileEarley bnf)
+instance Categorized token
+  => Matching [token] (RegEx token) where
+    word =~ pattern = word =~ liftBnf0 pattern
+instance Matching s (APrism s t a b) where
+  word =~ pattern = is pattern word
+
+{- |
+Generate words recognized by a `Bnf` using Earley chart progression.
+
+Chart/state progression is deterministic (state id order). Token realization is
+random but always valid for the selected terminal class.
+-}
+languageGen
+  :: (Applicative f, TokenAlgebra token (f token))
+  => Bnf (RegEx token)
+  -> f [[token]]
+languageGen bnf = sequenceA (fmap sampleWord classWords)
+  where
+    et = compileEarley bnf
+
+    classWords = enumerateByLength [(0, [], initialChart et)] Set.empty
+
+    sampleWord = traverse tokenClass . reverse
+
+    enumerateByLength [] _ = []
+    enumerateByLength frontier seen =
+      let
+        (accepted, seen') = acceptedAtFrontier frontier seen
+        next = concatMap expand frontier
+      in accepted <> enumerateByLength next seen'
+
+    acceptedAtFrontier frontier seen0 =
+      let (acceptedRev, seen') = foldl' step ([], seen0) frontier
+      in (reverse acceptedRev, seen')
+      where
+        step (acc, seen) (j, revWord, chart)
+          | acceptsChart j chart et =
+              if Set.member revWord seen
+                then (acc, seen)
+                else (revWord : acc, Set.insert revWord seen)
+          | otherwise = (acc, seen)
+
+    expand (j, revWord, chart) =
+      [ (j + 1, cls : revWord, nextChart)
+      | (cls, nextChart) <- scanClassOptions j chart et
+      ]
+
+initialChart
+  :: EarleyTransducer token
+  -> IntMap (IntMap IntSet)
+initialChart et = closeChartAt 0 (IntMap.singleton 0 initialE0) et
+  where
+    initialE0 = IntMap.fromList
+      [ (s, IntSet.singleton 0) | s <- IntSet.toList (earleyStartStates et) ]
+
+acceptsChart
+  :: Int
+  -> IntMap (IntMap IntSet)
+  -> EarleyTransducer token
+  -> Bool
+acceptsChart j chart et = IntSet.member 0 acceptOrigins
+  where
+    e_j = IntMap.findWithDefault IntMap.empty j chart
+    acceptOrigins = IntMap.findWithDefault IntSet.empty (earleyAcceptId et) e_j
+
+scanClassOptions
+  :: Categorized token
+  => Int
+  -> IntMap (IntMap IntSet)
+  -> EarleyTransducer token
+  -> [(TokenClass token, IntMap (IntMap IntSet))]
+scanClassOptions j chart et =
+  [ (cls, closeChartAt (j + 1) (IntMap.insert (j + 1) scanned chart) et)
+  | (cls, scanned) <- Map.toAscList grouped
+  ]
+  where
+    grouped = IntMap.foldrWithKey advance Map.empty e_j
+    e_j = IntMap.findWithDefault IntMap.empty j chart
+
+    advance s origs acc = case IntMap.lookup s (earleyStates et) of
+      Just (EarleyTerminal cls ds) ->
+        Map.insertWith mergeEarleySet cls scanned acc
+        where
+          scanned = IntSet.foldr
+            (\d -> IntMap.insertWith IntSet.union d origs) IntMap.empty ds
+      _ -> acc
+
+    mergeEarleySet = IntMap.unionWith IntSet.union
+
+closeChartAt
+  :: Int
+  -> IntMap (IntMap IntSet)
+  -> EarleyTransducer token
+  -> IntMap (IntMap IntSet)
+closeChartAt j initialChart0 et = loop initialWork initialChart0
+  where
+    initialE = IntMap.findWithDefault IntMap.empty j initialChart0
+    initialWork =
+      [ (s, i) | (s, os) <- IntMap.toList initialE, i <- IntSet.toList os ]
+
+    loop [] chart = chart
+    loop ((s, i) : rest) chart = case IntMap.lookup s (earleyStates et) of
+      Just (EarleyNonTerminal name ds) ->
+        let
+          (firsts, isNull) = Map.findWithDefault
+            (IntSet.empty, False) name (earleyRules et)
+          predItems = [(f, j) | f <- IntSet.toList firsts]
+          nullItems =
+            if isNull then [(d, i) | d <- IntSet.toList ds] else []
+          (chart', new) = addEarleyItems (predItems <> nullItems) chart
+        in loop (new <> rest) chart'
+      Just (EarleyEmit name) ->
+        let
+          e_i = IntMap.findWithDefault IntMap.empty i chart
+          completions =
+            [ (d, i')
+            | (t, os) <- IntMap.toList e_i
+            , Just (EarleyNonTerminal n' ds) <- [IntMap.lookup t (earleyStates et)]
+            , n' == name
+            , i' <- IntSet.toList os
+            , d <- IntSet.toList ds
+            ]
+          (chart', new) = addEarleyItems completions chart
+        in loop (new <> rest) chart'
+      _ -> loop rest chart
+
+    addEarleyItems items chart = foldl' ins (chart, []) items
+      where
+        ins (acc, new) (state, origin) =
+          let
+            e_j = IntMap.findWithDefault IntMap.empty j acc
+            os = IntMap.findWithDefault IntSet.empty state e_j
+          in if IntSet.member origin os
+            then (acc, new)
+            else
+              let
+                e_j' = IntMap.insert state (IntSet.insert origin os) e_j
+                acc' = IntMap.insert j e_j' acc
+              in (acc', (state, origin) : new)
diff --git a/src/Control/Lens/Grammar/Token.hs b/src/Control/Lens/Grammar/Token.hs
index 884d5963..449bfb76 100644
--- a/src/Control/Lens/Grammar/Token.hs
+++ b/src/Control/Lens/Grammar/Token.hs
@@ -22,9 +22,13 @@ import Control.Lens
 import Control.Lens.PartialIso
 import Data.Bifunctor.Joker
 import Data.Char
+import Data.Foldable
 import Data.Profunctor
 import Data.Profunctor.Monoidal
 import Data.Word
+import Test.QuickCheck.Arbitrary
+import Test.QuickCheck.Gen (Gen)
+import qualified Test.QuickCheck.Gen as Gen
 import Text.ParserCombinators.ReadP (ReadP)
 import qualified Text.ParserCombinators.ReadP as ReadP
 
@@ -137,3 +141,10 @@ instance Tokenized Char (ReadP Char) where
   notOneOf = ReadP.satisfy . notOneOf
   asIn = ReadP.satisfy . asIn
   notAsIn = ReadP.satisfy . notAsIn
+instance (Categorized token, Arbitrary token) => Tokenized token (Gen token) where
+  anyToken = arbitrary @token
+  token = pure
+  oneOf = Gen.elements . toList
+  notOneOf xs = arbitrary `Gen.suchThat` (`notElem` xs)
+  asIn cat = arbitrary `Gen.suchThat` ((==) cat . categorize)
+  notAsIn cat = arbitrary `Gen.suchThat` ((/=) cat . categorize)
diff --git a/src/Data/Profunctor/Grammar.hs b/src/Data/Profunctor/Grammar.hs
index b721355d..1eb2f03c 100644
--- a/src/Data/Profunctor/Grammar.hs
+++ b/src/Data/Profunctor/Grammar.hs
@@ -27,6 +27,7 @@ import Control.Lens
 import Control.Lens.Extras
 import Control.Lens.Grammar.BackusNaur
 import Control.Lens.Grammar.Kleene
+import Control.Lens.Grammar.Matching
 import Control.Lens.Grammar.Symbol
 import Control.Lens.Grammar.Token
 import Control.Monad
diff --git a/test/Main.hs b/test/Main.hs
index ae792432..610e5363 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -11,6 +11,7 @@ import Data.Profunctor.Types (Star (..))
 import System.Environment (lookupEnv)
 import Test.DocTest
 import Test.Hspec
+import Test.QuickCheck (generate)
 import qualified Text.Megaparsec as M
 
 import Examples.Arithmetic
@@ -30,15 +31,15 @@ main = do
     when shouldRunDoctests $
       describe "doctest" $
         it "should run haddock examples" doctests
-    describe "regexGrammar" $ for_ regexExamples $ testGrammar False regexGrammar
-    describe "semverGrammar" $ for_ semverExamples $ testGrammar True semverGrammar
-    describe "semverCtxGrammar" $ for_ semverExamples $ testCtxGrammar True semverCtxGrammar
-    describe "arithGrammar" $ for_ arithExamples $ testGrammar True arithGrammar
-    describe "jsonGrammar" $ for_ jsonExamples $ testGrammar False jsonGrammar
-    describe "sexprGrammar" $ for_ sexprExamples $ testGrammar True sexprGrammar
-    describe "lambdaGrammar" $ for_ lambdaExamples $ testGrammar True lambdaGrammar
-    describe "lenvecGrammar" $ for_ lenvecExamples $ testCtxGrammar True lenvecGrammar
-    describe "chainGrammar" $ for_ chainExamples $ testGrammar True chainGrammar
+    describe "regexGrammar" $ testCfg False regexExamples regexGrammar
+    describe "semverGrammar" $ testCfg True semverExamples semverGrammar
+    describe "semverCtxGrammar" $ testCsg True semverExamples semverCtxGrammar
+    describe "arithGrammar" $ testCfg True arithExamples arithGrammar
+    describe "jsonGrammar" $ testCfg False jsonExamples jsonGrammar
+    describe "sexprGrammar" $ testCfg True sexprExamples sexprGrammar
+    describe "lambdaGrammar" $ testCfg True lambdaExamples lambdaGrammar
+    describe "lenvecGrammar" $ testCsg True lenvecExamples lenvecGrammar
+    describe "chainGrammar" $ testCfg True chainExamples chainGrammar
     describe "Parsector try rollback" tryRollbackTests
     describe "Kleene" kleeneProperties
     describe "meander" meanderProperties
@@ -117,12 +118,23 @@ meanderProperties =
     seen `shouldBe` input
     units `shouldBe` replicate (length input) ()
 
-testGrammar :: (Show a, Eq a) => Bool -> Grammar Char a -> (a, String) -> Spec
-testGrammar isLL1 grammar (expectedSyntax, expectedString) = do
-  testCtxGrammar isLL1 grammar (expectedSyntax, expectedString)
-  it ("should match " <> expectedString <> " correctly") $ do
-    let actualMatch = expectedString =~ regbnfG grammar
-    actualMatch `shouldBe` True
+testCfg :: (Show a, Eq a) => Bool -> [(a, String)] -> Grammar Char a -> Spec
+testCfg isLL1 examples grammar = do
+  describe "examples" $ for_ examples $ \(expectedSyntax, expectedString) -> do
+    testCtxGrammar isLL1 grammar (expectedSyntax, expectedString)
+    it ("should match " <> expectedString <> " correctly") $ do
+      let actualMatch = expectedString =~ regbnfG grammar
+      actualMatch `shouldBe` True
+  describe "generated languageG" $ do
+    it "should parses with exactly one full parse" $ do
+      generated <- generate (take 100 <$> languageG grammar)
+      for_ generated $ \word -> do
+        let fullParses = [() | (_, "") <- parseG grammar word]
+        fullParses `shouldBe` [()]
+
+testCsg :: (Show a, Eq a) => Bool -> [(a, String)] -> CtxGrammar Char a -> Spec
+testCsg isLL1 examples grammar =
+  describe "examples" $ for_ examples $ testCtxGrammar isLL1 grammar
 
 testCtxGrammar :: (Show a, Eq a) => Bool -> CtxGrammar Char a -> (a, String) -> Spec
 testCtxGrammar isLL1 grammar (expectedSyntax, expectedString) = do

From 135aec1989b99ef19b9d937b3eeb4764a3fc6384 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Wed, 22 Apr 2026 16:36:34 -0700
Subject: [PATCH 09/31] random sample token classes

---
 distributors.cabal                 |  6 ++++++
 package.yaml                       |  3 +++
 src/Control/Lens/Grammar/Kleene.hs | 14 ++++++++++++++
 src/Control/Lens/Grammar/Token.hs  | 14 ++++++++++++++
 4 files changed, 37 insertions(+)

diff --git a/distributors.cabal b/distributors.cabal
index ed7a02ce..45b73833 100644
--- a/distributors.cabal
+++ b/distributors.cabal
@@ -108,12 +108,15 @@ library
     , contravariant >=1.5 && <2
     , distributive >=0.6 && <1
     , lens >=5.0 && <6
+    , monad-loops >=0.4.3 && <1
     , mtl >=2.2 && <3
     , profunctors >=5.6 && <6
+    , random >=1.2 && <2
     , tagged >=0.8 && <1
     , template-haskell >=2.17 && <3
     , text ==2.*
     , th-abstraction >=0.4 && <1
+    , transformers >=0.5 && <1
     , vector >=0.12 && <1
     , witherable >=0.4 && <1
   default-language: Haskell2010
@@ -190,12 +193,15 @@ test-suite test
     , hspec >=2.7 && <3
     , lens >=5.0 && <6
     , megaparsec >=9.0 && <10
+    , monad-loops >=0.4.3 && <1
     , mtl >=2.2 && <3
     , profunctors >=5.6 && <6
+    , random >=1.2 && <2
     , tagged >=0.8 && <1
     , template-haskell >=2.17 && <3
     , text ==2.*
     , th-abstraction >=0.4 && <1
+    , transformers >=0.5 && <1
     , vector >=0.12 && <1
     , witherable >=0.4 && <1
   default-language: Haskell2010
diff --git a/package.yaml b/package.yaml
index f0e85510..1e652244 100644
--- a/package.yaml
+++ b/package.yaml
@@ -28,7 +28,10 @@ dependencies:
 - lens >= 5.0 && < 6
 - MemoTrie >= 0.6 && < 1
 - QuickCheck >= 2.14 && < 3
+- monad-loops >=0.4.3 && < 1
 - mtl >= 2.2 && < 3
+- random >= 1.2 && < 2
+- transformers >= 0.5 && < 1
 - profunctors >= 5.6 && < 6
 - tagged >= 0.8 && < 1
 - template-haskell >= 2.17 && < 3
diff --git a/src/Control/Lens/Grammar/Kleene.hs b/src/Control/Lens/Grammar/Kleene.hs
index 5ffce0b1..03e8d739 100644
--- a/src/Control/Lens/Grammar/Kleene.hs
+++ b/src/Control/Lens/Grammar/Kleene.hs
@@ -29,6 +29,8 @@ import Control.Applicative
 import Control.Lens.Grammar.Boole
 import Control.Lens.Grammar.Symbol
 import Control.Lens.Grammar.Token
+import Control.Monad.Loops
+import Control.Monad.Trans.State.Strict (StateT, state)
 import Data.Bifunctor.Joker
 import Data.Foldable
 import Data.MemoTrie
@@ -38,6 +40,7 @@ import Data.Profunctor.Distributor
 import Data.Set (Set)
 import qualified Data.Set as Set
 import GHC.Generics
+import System.Random (RandomGen, Random, random)
 import Test.QuickCheck.Arbitrary
 import Test.QuickCheck.Gen (Gen)
 import qualified Test.QuickCheck.Gen as Gen
@@ -236,6 +239,17 @@ instance (Categorized token, Arbitrary token) => TokenAlgebra token (Gen token)
     NotOneOf xs (AndNotAsIn cats) -> arbitrary `Gen.suchThat`
       (\x -> x `notElem` xs && categorize x `notElem` cats)
     Alternate cls1 cls2 -> Gen.oneof [tokenClass cls1, tokenClass cls2]
+instance (RandomGen g, Monad m, Categorized token, Random token)
+  => TokenAlgebra token (StateT g m token) where
+  tokenClass (TokenClass exam) = case exam of
+    OneOf xs -> oneOf xs
+    NotOneOf xs (AndAsIn cat) ->
+      iterateUntil (\x -> x `notElem` xs && categorize x == cat) anyToken
+    NotOneOf xs (AndNotAsIn cats) ->
+      iterateUntil (\x -> x `notElem` xs && categorize x `notElem` cats) anyToken
+    Alternate cls1 cls2 -> do
+      b <- state random
+      if (b :: Bool) then tokenClass cls1 else tokenClass cls2
 instance Categorized token => Monoid (RegEx token) where
   mempty = SeqEmpty
 instance Categorized token => Semigroup (RegEx token) where
diff --git a/src/Control/Lens/Grammar/Token.hs b/src/Control/Lens/Grammar/Token.hs
index 449bfb76..1bede35c 100644
--- a/src/Control/Lens/Grammar/Token.hs
+++ b/src/Control/Lens/Grammar/Token.hs
@@ -20,12 +20,15 @@ module Control.Lens.Grammar.Token
 
 import Control.Lens
 import Control.Lens.PartialIso
+import Control.Monad.Loops (iterateUntil)
 import Data.Bifunctor.Joker
 import Data.Char
 import Data.Foldable
 import Data.Profunctor
 import Data.Profunctor.Monoidal
 import Data.Word
+import Control.Monad.Trans.State.Strict (StateT, state)
+import System.Random (RandomGen, Random, random, randomR)
 import Test.QuickCheck.Arbitrary
 import Test.QuickCheck.Gen (Gen)
 import qualified Test.QuickCheck.Gen as Gen
@@ -148,3 +151,14 @@ instance (Categorized token, Arbitrary token) => Tokenized token (Gen token) whe
   notOneOf xs = arbitrary `Gen.suchThat` (`notElem` xs)
   asIn cat = arbitrary `Gen.suchThat` ((==) cat . categorize)
   notAsIn cat = arbitrary `Gen.suchThat` ((/=) cat . categorize)
+instance (RandomGen g, Monad m, Categorized token, Random token)
+  => Tokenized token (StateT g m token) where
+  anyToken = state random
+  token = pure
+  oneOf xs = do
+    let ys = toList xs
+    i <- state (randomR (0, length ys - 1))
+    pure (ys !! i)
+  notOneOf xs = iterateUntil (`notElem` xs) anyToken
+  asIn cat = iterateUntil ((== cat) . categorize) anyToken
+  notAsIn cat = iterateUntil ((/= cat) . categorize) anyToken

From 6b91bcb4b37b14275c0789f71f145db4ce9ef8d2 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Wed, 22 Apr 2026 16:56:42 -0700
Subject: [PATCH 10/31] Update Matching.hs

---
 src/Control/Lens/Grammar/Matching.hs | 68 +++++++++++++++++++++++-----
 1 file changed, 56 insertions(+), 12 deletions(-)

diff --git a/src/Control/Lens/Grammar/Matching.hs b/src/Control/Lens/Grammar/Matching.hs
index 0bb6de50..056fe689 100644
--- a/src/Control/Lens/Grammar/Matching.hs
+++ b/src/Control/Lens/Grammar/Matching.hs
@@ -14,6 +14,8 @@ http://trevorjim.com/papers/ldta-2009.pdf
 module Control.Lens.Grammar.Matching
   ( Matching (..)
   , languageGen
+  , expected
+  , unreachableRules
   ) where
 
 import Control.Lens
@@ -29,6 +31,7 @@ import Data.IntSet (IntSet)
 import qualified Data.Map.Strict as Map
 import Data.Map.Strict (Map)
 import qualified Data.Set as Set
+import Data.Set (Set)
 
 -- | Does a word match a pattern?
 class Matching word pattern | pattern -> word where
@@ -184,21 +187,21 @@ compileEarley (Bnf start rules) = EarleyTransducer
             )
 
 matchEarley :: Categorized token => [token] -> EarleyTransducer token -> Bool
-matchEarley word et = acceptsChart n finalSets et
-  where
-
-    initialE0 = IntMap.fromList
-      [ (s, IntSet.singleton 0) | s <- IntSet.toList (earleyStartStates et) ]
-
-    sets0 = closeChartAt 0 (IntMap.singleton 0 initialE0) et
-
-    (finalSets, n) = runInput 0 sets0 word
+matchEarley word et = acceptsChart n chart et
+  where (n, chart) = runEarleyPrefix word et
 
-    runInput j ss [] = (ss, j)
-    runInput j ss (x : xs) =
+runEarleyPrefix
+  :: Categorized token
+  => [token]
+  -> EarleyTransducer token
+  -> (Int, IntMap (IntMap IntSet))
+runEarleyPrefix word et = go 0 (initialChart et) word
+  where
+    go j ss [] = (j, ss)
+    go j ss (x : xs) =
       let scanned = scanFrom j x ss
           closed = closeChartAt (j + 1) (IntMap.insert (j + 1) scanned ss) et
-      in runInput (j + 1) closed xs
+      in go (j + 1) closed xs
 
     scanFrom j input ss = IntMap.foldrWithKey advance IntMap.empty e_j
       where
@@ -208,6 +211,47 @@ matchEarley word et = acceptsChart n finalSets et
             IntSet.foldr
               (\d -> IntMap.insertWith IntSet.union d origs) acc ds
           _ -> acc
+
+{- |
+Token classes that could legally appear next after the given input prefix,
+according to the grammar. An empty result means the prefix is a dead end —
+no extension can ever be accepted. Useful for autocomplete and for
+\"expected one of …\" parse errors.
+-}
+expected
+  :: Categorized token
+  => [token] -> Bnf (RegEx token) -> [TokenClass token]
+expected word bnf = map fst (scanClassOptions n chart et)
+  where
+    et = compileEarley bnf
+    (n, chart) = runEarleyPrefix word et
+
+{- |
+Rule names declared in the `Bnf` that can never be entered from the start
+expression — dead productions. A non-empty result is a grammar-hygiene
+warning: those rules can be deleted without changing the recognized language.
+-}
+unreachableRules :: Bnf (RegEx token) -> Set String
+unreachableRules bnf =
+  Map.keysSet (earleyRules et) `Set.difference` called
+  where
+    et = compileEarley bnf
+    called = bfs (earleyStartStates et) IntSet.empty Set.empty
+
+    bfs frontier seen calls
+      | IntSet.null fresh = calls
+      | otherwise = bfs next (seen <> fresh) calls'
+      where
+        fresh = IntSet.difference frontier seen
+        (next, calls') = IntSet.foldr step (IntSet.empty, calls) fresh
+
+    step s (acc, cs) = case IntMap.lookup s (earleyStates et) of
+      Just (EarleyTerminal _ ds) -> (acc <> ds, cs)
+      Just (EarleyNonTerminal name ds) ->
+        let firsts = maybe IntSet.empty fst (Map.lookup name (earleyRules et))
+        in (acc <> ds <> firsts, Set.insert name cs)
+      Just (EarleyEmit _) -> (acc, cs)
+      Nothing -> (acc, cs)
 -- instances
 instance Categorized token
   => Matching [token] (Bnf (RegEx token)) where

From 4dfbb3a3b6926af5c5897bd7c3ecbbe7d437eb64 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Wed, 22 Apr 2026 21:54:56 -0700
Subject: [PATCH 11/31] cleaning

---
 src/Control/Lens/Grammar.hs          |   2 +-
 src/Control/Lens/Grammar/Matching.hs | 143 +++++++++++++--------------
 2 files changed, 71 insertions(+), 74 deletions(-)

diff --git a/src/Control/Lens/Grammar.hs b/src/Control/Lens/Grammar.hs
index 39720532..fa08b090 100644
--- a/src/Control/Lens/Grammar.hs
+++ b/src/Control/Lens/Grammar.hs
@@ -793,7 +793,7 @@ regbnfG :: Grammar Char a -> RegBnf
 regbnfG bnf = runGrammor bnf
 
 languageG :: (Applicative f, TokenAlgebra token (f token)) => Grammar token a -> f [[token]]
-languageG bnf = languageGen (runGrammor bnf)
+languageG bnf = languageGen (compileTransducer (runGrammor bnf))
 
 {- | `printG` generates a printer from a `CtxGrammar`.
 Since both `RegGrammar`s and context-free `Grammar`s are `CtxGrammar`s,
diff --git a/src/Control/Lens/Grammar/Matching.hs b/src/Control/Lens/Grammar/Matching.hs
index 056fe689..2aae09a6 100644
--- a/src/Control/Lens/Grammar/Matching.hs
+++ b/src/Control/Lens/Grammar/Matching.hs
@@ -13,9 +13,12 @@ http://trevorjim.com/papers/ldta-2009.pdf
 
 module Control.Lens.Grammar.Matching
   ( Matching (..)
+  , Transducer (..)
+  , TransducerState (..)
+  , compileTransducer
   , languageGen
-  , expected
-  , unreachableRules
+  , expectedGen
+  , unreachableGen
   ) where
 
 import Control.Lens
@@ -39,26 +42,26 @@ class Matching word pattern | pattern -> word where
   infix 2 =~
 
 -- | A state in the Earley-extended Thompson transducer for a `Bnf`.
--- @EarleyTerminal cls ds@ matches on a token class and transitions to @ds@.
--- @EarleyNonTerminal name ds@ is a call point for rule @name@; after @name@
--- completes, control flows to @ds@. @EarleyEmit name@ is the final state
+-- @TransducerTokenClass cls ds@ matches on a token class and transitions to @ds@.
+-- @TransducerNonTerminal name ds@ is a call point for rule @name@; after @name@
+-- completes, control flows to @ds@. @TransducerEmit name@ is the final state
 -- for rule @name@ and triggers completion during Earley closure.
-data EarleyState token
-  = EarleyTerminal (TokenClass token) IntSet
-  | EarleyNonTerminal String IntSet
-  | EarleyEmit String
-
-data EarleyTransducer token = EarleyTransducer
-  { earleyStates :: IntMap (EarleyState token)
-  , earleyRules :: Map String (IntSet, Bool)
-  , earleyAcceptId :: Int
-  , earleyStartStates :: IntSet
+data TransducerState token
+  = TransducerTokenClass (TokenClass token) IntSet
+  | TransducerNonTerminal String IntSet
+  | TransducerEmit String
+
+data Transducer token = Transducer
+  { transducerStates :: IntMap (TransducerState token)
+  , transducerRules :: Map String (IntSet, Bool)
+  , transducerAcceptId :: Int
+  , transducerStartStates :: IntSet
   }
 
-compileEarley :: Bnf (RegEx token) -> EarleyTransducer token
-compileEarley (Bnf start rules) = EarleyTransducer
-  { earleyStates = IntMap.fromList allStates
-  , earleyRules = Map.fromList
+compileTransducer :: Bnf (RegEx token) -> Transducer token
+compileTransducer (Bnf start rules) = Transducer
+  { transducerStates = IntMap.fromList allStates
+  , transducerRules = Map.fromList
       [ ( n
         , ( Map.findWithDefault IntSet.empty n firstsMap
           , Map.findWithDefault False n nullMap
@@ -66,8 +69,8 @@ compileEarley (Bnf start rules) = EarleyTransducer
         )
       | n <- Map.keys ruleMap
       ]
-  , earleyAcceptId = earleyAcceptId0
-  , earleyStartStates = startStates
+  , transducerAcceptId = transducerAcceptId0
+  , transducerStartStates = startStates
   }
 
   where
@@ -95,13 +98,13 @@ compileEarley (Bnf start rules) = EarleyTransducer
 
     ruleNames = Map.keys ruleMap
 
-    earleyAcceptId0 = 0
+    transducerAcceptId0 = 0
 
     (finalMap, nextIdAfterFinals) =
-      foldl' alloc (Map.empty, earleyAcceptId0 + 1) ruleNames
+      foldl' alloc (Map.empty, transducerAcceptId0 + 1) ruleNames
       where alloc (m, i) n = (Map.insert n i m, i + 1)
 
-    finalStatesList = [(finalMap Map.! n, EarleyEmit n) | n <- ruleNames]
+    finalStatesList = [(finalMap Map.! n, TransducerEmit n) | n <- ruleNames]
 
     (rulesStatesList, firstsMap, nextIdAfterRules) =
       foldl' compileRule ([], Map.empty, nextIdAfterFinals) (Map.toList ruleMap)
@@ -117,10 +120,10 @@ compileEarley (Bnf start rules) = EarleyTransducer
           in (sts <> newSts, Map.insert name newFirsts fm, nid')
 
     (startFirsts, startStatesRaw, _, startBypass) =
-      goEarley start nextIdAfterRules (IntSet.singleton earleyAcceptId0)
+      goEarley start nextIdAfterRules (IntSet.singleton transducerAcceptId0)
 
     startStates =
-      startFirsts <> bypassStates startBypass (IntSet.singleton earleyAcceptId0)
+      startFirsts <> bypassStates startBypass (IntSet.singleton transducerAcceptId0)
 
     allStates = finalStatesList <> rulesStatesList <> startStatesRaw
 
@@ -131,7 +134,7 @@ compileEarley (Bnf start rules) = EarleyTransducer
         SeqEmpty -> (IntSet.empty, [], nextId, True)
         NonTerminal name ->
           ( IntSet.singleton nextId
-          , [(nextId, EarleyNonTerminal name dests)]
+          , [(nextId, TransducerNonTerminal name dests)]
           , nextId + 1
           , Map.findWithDefault False name nullMap
           )
@@ -165,13 +168,13 @@ compileEarley (Bnf start rules) = EarleyTransducer
           | Set.null chars -> (IntSet.empty, [], nextId, False)
           | otherwise ->
               ( IntSet.singleton nextId
-              , [(nextId, EarleyTerminal (TokenClass (OneOf chars)) dests)]
+              , [(nextId, TransducerTokenClass (TokenClass (OneOf chars)) dests)]
               , nextId + 1
               , False
               )
         RegExam (NotOneOf chars catTest) ->
           ( IntSet.singleton nextId
-          , [(nextId, EarleyTerminal (TokenClass (NotOneOf chars catTest)) dests)]
+          , [(nextId, TransducerTokenClass (TokenClass (NotOneOf chars catTest)) dests)]
           , nextId + 1
           , False
           )
@@ -186,16 +189,12 @@ compileEarley (Bnf start rules) = EarleyTransducer
             , bypass0 || bypass1
             )
 
-matchEarley :: Categorized token => [token] -> EarleyTransducer token -> Bool
-matchEarley word et = acceptsChart n chart et
-  where (n, chart) = runEarleyPrefix word et
-
-runEarleyPrefix
+prefixGen
   :: Categorized token
   => [token]
-  -> EarleyTransducer token
+  -> Transducer token
   -> (Int, IntMap (IntMap IntSet))
-runEarleyPrefix word et = go 0 (initialChart et) word
+prefixGen word et = go 0 (initialChart et) word
   where
     go j ss [] = (j, ss)
     go j ss (x : xs) =
@@ -206,8 +205,8 @@ runEarleyPrefix word et = go 0 (initialChart et) word
     scanFrom j input ss = IntMap.foldrWithKey advance IntMap.empty e_j
       where
         e_j = IntMap.findWithDefault IntMap.empty j ss
-        advance s origs acc = case IntMap.lookup s (earleyStates et) of
-          Just (EarleyTerminal cls ds) | tokenClass cls input ->
+        advance s origs acc = case IntMap.lookup s (transducerStates et) of
+          Just (TransducerTokenClass cls ds) | tokenClass cls input ->
             IntSet.foldr
               (\d -> IntMap.insertWith IntSet.union d origs) acc ds
           _ -> acc
@@ -218,25 +217,23 @@ according to the grammar. An empty result means the prefix is a dead end —
 no extension can ever be accepted. Useful for autocomplete and for
 \"expected one of …\" parse errors.
 -}
-expected
+expectedGen
   :: Categorized token
-  => [token] -> Bnf (RegEx token) -> [TokenClass token]
-expected word bnf = map fst (scanClassOptions n chart et)
+  => [token] -> Transducer token -> [TokenClass token]
+expectedGen word et = map fst (scanClassOptions n chart et)
   where
-    et = compileEarley bnf
-    (n, chart) = runEarleyPrefix word et
+    (n, chart) = prefixGen word et
 
 {- |
 Rule names declared in the `Bnf` that can never be entered from the start
 expression — dead productions. A non-empty result is a grammar-hygiene
 warning: those rules can be deleted without changing the recognized language.
 -}
-unreachableRules :: Bnf (RegEx token) -> Set String
-unreachableRules bnf =
-  Map.keysSet (earleyRules et) `Set.difference` called
+unreachableGen :: Transducer token -> Set String
+unreachableGen et =
+  Map.keysSet (transducerRules et) `Set.difference` called
   where
-    et = compileEarley bnf
-    called = bfs (earleyStartStates et) IntSet.empty Set.empty
+    called = bfs (transducerStartStates et) IntSet.empty Set.empty
 
     bfs frontier seen calls
       | IntSet.null fresh = calls
@@ -245,17 +242,20 @@ unreachableRules bnf =
         fresh = IntSet.difference frontier seen
         (next, calls') = IntSet.foldr step (IntSet.empty, calls) fresh
 
-    step s (acc, cs) = case IntMap.lookup s (earleyStates et) of
-      Just (EarleyTerminal _ ds) -> (acc <> ds, cs)
-      Just (EarleyNonTerminal name ds) ->
-        let firsts = maybe IntSet.empty fst (Map.lookup name (earleyRules et))
+    step s (acc, cs) = case IntMap.lookup s (transducerStates et) of
+      Just (TransducerTokenClass _ ds) -> (acc <> ds, cs)
+      Just (TransducerNonTerminal name ds) ->
+        let firsts = maybe IntSet.empty fst (Map.lookup name (transducerRules et))
         in (acc <> ds <> firsts, Set.insert name cs)
-      Just (EarleyEmit _) -> (acc, cs)
+      Just (TransducerEmit _) -> (acc, cs)
       Nothing -> (acc, cs)
 -- instances
 instance Categorized token
   => Matching [token] (Bnf (RegEx token)) where
-    word =~ bnf = matchEarley word (compileEarley bnf)
+    word =~ bnf = acceptsChart n chart et
+      where
+        et = compileTransducer bnf
+        (n, chart) = prefixGen word et
 instance Categorized token
   => Matching [token] (RegEx token) where
     word =~ pattern = word =~ liftBnf0 pattern
@@ -270,11 +270,10 @@ random but always valid for the selected terminal class.
 -}
 languageGen
   :: (Applicative f, TokenAlgebra token (f token))
-  => Bnf (RegEx token)
+  => Transducer token
   -> f [[token]]
-languageGen bnf = sequenceA (fmap sampleWord classWords)
+languageGen et = sequenceA (fmap sampleWord classWords)
   where
-    et = compileEarley bnf
 
     classWords = enumerateByLength [(0, [], initialChart et)] Set.empty
 
@@ -304,28 +303,28 @@ languageGen bnf = sequenceA (fmap sampleWord classWords)
       ]
 
 initialChart
-  :: EarleyTransducer token
+  :: Transducer token
   -> IntMap (IntMap IntSet)
 initialChart et = closeChartAt 0 (IntMap.singleton 0 initialE0) et
   where
     initialE0 = IntMap.fromList
-      [ (s, IntSet.singleton 0) | s <- IntSet.toList (earleyStartStates et) ]
+      [ (s, IntSet.singleton 0) | s <- IntSet.toList (transducerStartStates et) ]
 
 acceptsChart
   :: Int
   -> IntMap (IntMap IntSet)
-  -> EarleyTransducer token
+  -> Transducer token
   -> Bool
 acceptsChart j chart et = IntSet.member 0 acceptOrigins
   where
     e_j = IntMap.findWithDefault IntMap.empty j chart
-    acceptOrigins = IntMap.findWithDefault IntSet.empty (earleyAcceptId et) e_j
+    acceptOrigins = IntMap.findWithDefault IntSet.empty (transducerAcceptId et) e_j
 
 scanClassOptions
   :: Categorized token
   => Int
   -> IntMap (IntMap IntSet)
-  -> EarleyTransducer token
+  -> Transducer token
   -> [(TokenClass token, IntMap (IntMap IntSet))]
 scanClassOptions j chart et =
   [ (cls, closeChartAt (j + 1) (IntMap.insert (j + 1) scanned chart) et)
@@ -335,20 +334,18 @@ scanClassOptions j chart et =
     grouped = IntMap.foldrWithKey advance Map.empty e_j
     e_j = IntMap.findWithDefault IntMap.empty j chart
 
-    advance s origs acc = case IntMap.lookup s (earleyStates et) of
-      Just (EarleyTerminal cls ds) ->
-        Map.insertWith mergeEarleySet cls scanned acc
+    advance s origs acc = case IntMap.lookup s (transducerStates et) of
+      Just (TransducerTokenClass cls ds) ->
+        Map.insertWith (IntMap.unionWith IntSet.union) cls scanned acc
         where
           scanned = IntSet.foldr
             (\d -> IntMap.insertWith IntSet.union d origs) IntMap.empty ds
       _ -> acc
 
-    mergeEarleySet = IntMap.unionWith IntSet.union
-
 closeChartAt
   :: Int
   -> IntMap (IntMap IntSet)
-  -> EarleyTransducer token
+  -> Transducer token
   -> IntMap (IntMap IntSet)
 closeChartAt j initialChart0 et = loop initialWork initialChart0
   where
@@ -357,23 +354,23 @@ closeChartAt j initialChart0 et = loop initialWork initialChart0
       [ (s, i) | (s, os) <- IntMap.toList initialE, i <- IntSet.toList os ]
 
     loop [] chart = chart
-    loop ((s, i) : rest) chart = case IntMap.lookup s (earleyStates et) of
-      Just (EarleyNonTerminal name ds) ->
+    loop ((s, i) : rest) chart = case IntMap.lookup s (transducerStates et) of
+      Just (TransducerNonTerminal name ds) ->
         let
           (firsts, isNull) = Map.findWithDefault
-            (IntSet.empty, False) name (earleyRules et)
+            (IntSet.empty, False) name (transducerRules et)
           predItems = [(f, j) | f <- IntSet.toList firsts]
           nullItems =
             if isNull then [(d, i) | d <- IntSet.toList ds] else []
           (chart', new) = addEarleyItems (predItems <> nullItems) chart
         in loop (new <> rest) chart'
-      Just (EarleyEmit name) ->
+      Just (TransducerEmit name) ->
         let
           e_i = IntMap.findWithDefault IntMap.empty i chart
           completions =
             [ (d, i')
             | (t, os) <- IntMap.toList e_i
-            , Just (EarleyNonTerminal n' ds) <- [IntMap.lookup t (earleyStates et)]
+            , Just (TransducerNonTerminal n' ds) <- [IntMap.lookup t (transducerStates et)]
             , n' == name
             , i' <- IntSet.toList os
             , d <- IntSet.toList ds

From 11d2439cddaedca344c1577fea60a9b87ca9128e Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Wed, 22 Apr 2026 22:26:36 -0700
Subject: [PATCH 12/31] machine head

---
 distributors.cabal                            |   2 +-
 src/Control/Lens/Grammar.hs                   |   4 +-
 .../Lens/Grammar/{Matching.hs => Machine.hs}  | 211 +++++++++++++-----
 src/Data/Profunctor/Grammar.hs                |   2 +-
 4 files changed, 154 insertions(+), 65 deletions(-)
 rename src/Control/Lens/Grammar/{Matching.hs => Machine.hs} (60%)

diff --git a/distributors.cabal b/distributors.cabal
index 45b73833..c620f938 100644
--- a/distributors.cabal
+++ b/distributors.cabal
@@ -36,7 +36,7 @@ library
       Control.Lens.Grammar.Internal.NestedPrismTH
       Control.Lens.Grammar.Internal.Orphanage
       Control.Lens.Grammar.Kleene
-      Control.Lens.Grammar.Matching
+      Control.Lens.Grammar.Machine
       Control.Lens.Grammar.Symbol
       Control.Lens.Grammar.Token
       Control.Lens.Grate
diff --git a/src/Control/Lens/Grammar.hs b/src/Control/Lens/Grammar.hs
index fa08b090..80c23c6f 100644
--- a/src/Control/Lens/Grammar.hs
+++ b/src/Control/Lens/Grammar.hs
@@ -46,7 +46,7 @@ import Control.Lens.PartialIso
 import Control.Lens.Grammar.BackusNaur
 import Control.Lens.Grammar.Boole
 import Control.Lens.Grammar.Kleene
-import Control.Lens.Grammar.Matching
+import Control.Lens.Grammar.Machine
 import Control.Lens.Grammar.Token
 import Control.Lens.Grammar.Symbol
 import Data.Bifunctor.Joker
@@ -69,7 +69,7 @@ import Witherable
 import Control.Lens.Grammar.BackusNaur as X
 import Control.Lens.Grammar.Boole as X
 import Control.Lens.Grammar.Kleene as X
-import Control.Lens.Grammar.Matching as X
+import Control.Lens.Grammar.Machine as X
 import Control.Lens.Grammar.Symbol as X
 import Control.Lens.Grammar.Token as X
 import Control.Lens.PartialIso as X
diff --git a/src/Control/Lens/Grammar/Matching.hs b/src/Control/Lens/Grammar/Machine.hs
similarity index 60%
rename from src/Control/Lens/Grammar/Matching.hs
rename to src/Control/Lens/Grammar/Machine.hs
index 2aae09a6..54bab061 100644
--- a/src/Control/Lens/Grammar/Matching.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -1,21 +1,51 @@
 {- |
-Module      : Control.Lens.Grammar.Matching
-Description : pattern matching & language generation
+Module      : Control.Lens.Grammar.Machine
+Description : finite-machine compilation and Earley-style matching
 Copyright   : (C) 2026 - Eitan Chatav
 License     : BSD-style (see the file LICENSE)
 Maintainer  : Eitan Chatav <eitan.chatav@gmail.com>
 Stability   : provisional
 Portability : non-portable
 
-https://www.cs.dartmouth.edu/~doug/nfa.pdf
-http://trevorjim.com/papers/ldta-2009.pdf
+This module presents a machine-oriented view of grammars:
+
+1. Compile a grammar with regular right-hand sides into a finite control machine
+   (`Transducer`) using a Thompson-style construction over regular expressions.
+2. Run Earley-style chart recognition over that machine.
+3. Derive practical products from the same engine: boolean matching,
+   "expected token class" reporting, language generation, and dead-rule analysis.
+
+The implementation follows mainstream Earley terminology (predict/scan/complete)
+while adopting the transducer perspective used by Jim and Mandelbaum:
+
+* Earley item `(q,i)` is represented as "machine state id @q@ with origin @i@"
+  stored in chart set @E_j@.
+* `closeChartAt` performs predict and complete to a fixed point at position @j@.
+* `scanFrom`/`scanClassOptions` perform scanner steps to build @E_{j+1}@.
+* `TransducerEmit` provides the "completed nonterminal" signal used by complete.
+
+References:
+
+* Trevor Jim and Yitzhak Mandelbaum, /Efficient Earley Parsing with Regular
+  Right-hand Sides/ (LDTA 2009).
+* Thompson-style NFA compilation accounts from compiler literature.
+* Standard Earley algorithm presentations (state sets, origin indices,
+  predictor/scanner/completer).
 -}
 
-module Control.Lens.Grammar.Matching
-  ( Matching (..)
+module Control.Lens.Grammar.Machine
+  (
+    -- * Matching Interface
+    Matching (..)
+
+    -- * Machine Representation
   , Transducer (..)
   , TransducerState (..)
+
+    -- * Compilation
   , compileTransducer
+
+    -- * Machine Execution Utilities
   , languageGen
   , expectedGen
   , unreachableGen
@@ -37,27 +67,73 @@ import qualified Data.Set as Set
 import Data.Set (Set)
 
 -- | Does a word match a pattern?
+--
+-- For grammar and regular-expression patterns in this package, matching is
+-- performed by chart recognition over a compiled machine (`Transducer`).
 class Matching word pattern | pattern -> word where
   (=~) :: word -> pattern -> Bool
   infix 2 =~
 
--- | A state in the Earley-extended Thompson transducer for a `Bnf`.
--- @TransducerTokenClass cls ds@ matches on a token class and transitions to @ds@.
--- @TransducerNonTerminal name ds@ is a call point for rule @name@; after @name@
--- completes, control flows to @ds@. @TransducerEmit name@ is the final state
--- for rule @name@ and triggers completion during Earley closure.
+-- | A control state in the compiled parsing machine.
+--
+-- Read these constructors as machine instructions:
+--
+-- * `TransducerTokenClass` is a scanner transition over a terminal class.
+-- * `TransducerNonTerminal` is a call site for a nonterminal.
+-- * `TransducerEmit` is a completed nonterminal output used by completion.
+--
+-- This corresponds to the "finite control + call/return" view of parsing
+-- transducers in LDTA 2009.
 data TransducerState token
   = TransducerTokenClass (TokenClass token) IntSet
   | TransducerNonTerminal String IntSet
   | TransducerEmit String
 
+-- | Finite control machine used by the Earley-style recognizer.
+--
+-- Formal machine view (adapted for this parser):
+--
+-- @
+-- T = (Q, Σ, Γ, I, F, δ)
+-- @
+--
+-- where:
+--
+-- * @Q@ (control states) = keys of `transducerStates`.
+-- * @Σ@ (terminal alphabet) is represented intensionally by
+--   `TokenClass token` labels in `TransducerTokenClass` transitions.
+-- * @Γ@ (call alphabet / nonterminal symbols) = keys of `transducerRules`
+--   and names carried by `TransducerNonTerminal` / `TransducerEmit`.
+-- * @I@ (initial states) = `transducerStartStates`.
+-- * @F@ (accepting states) = singleton `{transducerAcceptId}` for recognition.
+-- * @δ@ (transition relation) is encoded by `transducerStates` constructors:
+--
+--   * `TransducerTokenClass cls ds` contributes terminal transitions on any
+--     token matching @cls@ from the current state to each state in @ds@.
+--   * `TransducerNonTerminal name ds` contributes call transitions on symbol
+--     @name@ with return destinations @ds@ (used by Earley predict/complete).
+--   * `TransducerEmit name` is a completion/output state for nonterminal @name@,
+--     consumed by Earley completion rather than terminal scanning.
+--
+-- `transducerRules` is auxiliary indexing for Earley closure (entry-state set and
+-- nullability per nonterminal), not an additional machine component.
 data Transducer token = Transducer
-  { transducerStates :: IntMap (TransducerState token)
-  , transducerRules :: Map String (IntSet, Bool)
-  , transducerAcceptId :: Int
-  , transducerStartStates :: IntSet
+  { transducerStates :: IntMap (TransducerState token) -- ^ @Q@
+  , transducerRules :: Map String (IntSet, Bool) -- ^ @Γ@
+  , transducerAcceptId :: Int -- ^ @F@
+  , transducerStartStates :: IntSet -- ^ @I@
   }
 
+-- | Compile a regular-right-side grammar into a parsing transducer.
+--
+-- Construction outline:
+--
+-- * Regular-expression fragments are lowered in Thompson style by `goEarley`.
+-- * Each nonterminal gets a distinguished emit/final state.
+-- * Concatenation/alternation/Kleene operators wire state sets and bypassability
+--   (nullability) as in regular-expression automata construction.
+-- * A fixed-point nullability analysis over the grammar enables null completion
+--   during Earley closure.
 compileTransducer :: Bnf (RegEx token) -> Transducer token
 compileTransducer (Bnf start rules) = Transducer
   { transducerStates = IntMap.fromList allStates
@@ -191,20 +267,20 @@ compileTransducer (Bnf start rules) = Transducer
 
 prefixGen
   :: Categorized token
-  => [token]
-  -> Transducer token
+  => Transducer token
+  -> [token]
   -> (Int, IntMap (IntMap IntSet))
-prefixGen word et = go 0 (initialChart et) word
+prefixGen et word = go 0 (initialChart et) word
   where
-    go j ss [] = (j, ss)
-    go j ss (x : xs) =
-      let scanned = scanFrom j x ss
-          closed = closeChartAt (j + 1) (IntMap.insert (j + 1) scanned ss) et
+    go j chart [] = (j, chart)
+    go j chart (x : xs) =
+      let scanned = scanFrom j x chart
+          closed = closeChartAt et (j + 1) (IntMap.insert (j + 1) scanned chart)
       in go (j + 1) closed xs
 
-    scanFrom j input ss = IntMap.foldrWithKey advance IntMap.empty e_j
+    scanFrom j input chart = IntMap.foldrWithKey advance IntMap.empty eJ
       where
-        e_j = IntMap.findWithDefault IntMap.empty j ss
+        eJ = IntMap.findWithDefault IntMap.empty j chart
         advance s origs acc = case IntMap.lookup s (transducerStates et) of
           Just (TransducerTokenClass cls ds) | tokenClass cls input ->
             IntSet.foldr
@@ -212,22 +288,27 @@ prefixGen word et = go 0 (initialChart et) word
           _ -> acc
 
 {- |
-Token classes that could legally appear next after the given input prefix,
-according to the grammar. An empty result means the prefix is a dead end —
-no extension can ever be accepted. Useful for autocomplete and for
-\"expected one of …\" parse errors.
+Earley scanner frontier summarized as token classes.
+
+Returns terminal classes that can be scanned next after the given input prefix.
+An empty result means the current chart has no scanner transitions, i.e. the
+prefix is a dead end for recognition.
+
+This is the machine-level version of "what terminals are expected next?".
 -}
 expectedGen
   :: Categorized token
-  => [token] -> Transducer token -> [TokenClass token]
-expectedGen word et = map fst (scanClassOptions n chart et)
+  => Transducer token -> [token] -> [TokenClass token]
+expectedGen et word = map fst (scanClassOptions et n chart)
   where
-    (n, chart) = prefixGen word et
+    (n, chart) = prefixGen et word
 
 {- |
 Rule names declared in the `Bnf` that can never be entered from the start
 expression — dead productions. A non-empty result is a grammar-hygiene
 warning: those rules can be deleted without changing the recognized language.
+
+Operationally, this is reachability over control states plus nonterminal calls.
 -}
 unreachableGen :: Transducer token -> Set String
 unreachableGen et =
@@ -252,10 +333,10 @@ unreachableGen et =
 -- instances
 instance Categorized token
   => Matching [token] (Bnf (RegEx token)) where
-    word =~ bnf = acceptsChart n chart et
+    word =~ bnf = acceptsChart et n chart
       where
         et = compileTransducer bnf
-        (n, chart) = prefixGen word et
+        (n, chart) = prefixGen et word
 instance Categorized token
   => Matching [token] (RegEx token) where
     word =~ pattern = word =~ liftBnf0 pattern
@@ -263,10 +344,14 @@ instance Matching s (APrism s t a b) where
   word =~ pattern = is pattern word
 
 {- |
-Generate words recognized by a `Bnf` using Earley chart progression.
+Generate words recognized by a grammar machine using chart progression.
+
+The algorithm performs a breadth-first exploration over scanner frontiers derived
+from Earley sets, so words are produced by nondecreasing length.
 
-Chart/state progression is deterministic (state id order). Token realization is
-random but always valid for the selected terminal class.
+Chart/state progression is deterministic (state id order). Token realization uses
+`TokenAlgebra` and may be nondeterministic, but is always valid for the chosen
+terminal class.
 -}
 languageGen
   :: (Applicative f, TokenAlgebra token (f token))
@@ -291,7 +376,7 @@ languageGen et = sequenceA (fmap sampleWord classWords)
       in (reverse acceptedRev, seen')
       where
         step (acc, seen) (j, revWord, chart)
-          | acceptsChart j chart et =
+          | acceptsChart et j chart =
               if Set.member revWord seen
                 then (acc, seen)
                 else (revWord : acc, Set.insert revWord seen)
@@ -299,40 +384,43 @@ languageGen et = sequenceA (fmap sampleWord classWords)
 
     expand (j, revWord, chart) =
       [ (j + 1, cls : revWord, nextChart)
-      | (cls, nextChart) <- scanClassOptions j chart et
+      | (cls, nextChart) <- scanClassOptions et j chart
       ]
 
 initialChart
   :: Transducer token
   -> IntMap (IntMap IntSet)
-initialChart et = closeChartAt 0 (IntMap.singleton 0 initialE0) et
+initialChart et = closeChartAt et 0 (IntMap.singleton 0 initialE0)
   where
     initialE0 = IntMap.fromList
       [ (s, IntSet.singleton 0) | s <- IntSet.toList (transducerStartStates et) ]
 
+-- Accept iff (q_accept, 0) is in E_n.
 acceptsChart
-  :: Int
+  :: Transducer token
+  -> Int
   -> IntMap (IntMap IntSet)
-  -> Transducer token
   -> Bool
-acceptsChart j chart et = IntSet.member 0 acceptOrigins
+acceptsChart et j chart = IntSet.member 0 acceptOrigins
   where
-    e_j = IntMap.findWithDefault IntMap.empty j chart
-    acceptOrigins = IntMap.findWithDefault IntSet.empty (transducerAcceptId et) e_j
+    eJ = IntMap.findWithDefault IntMap.empty j chart
+    acceptOrigins = IntMap.findWithDefault IntSet.empty (transducerAcceptId et) eJ
 
+-- Group all scanner moves from E_j by token class; each result also carries the
+-- closed successor chart at j+1.
 scanClassOptions
-  :: Categorized token
+  :: Transducer token
+  -> Categorized token
   => Int
   -> IntMap (IntMap IntSet)
-  -> Transducer token
   -> [(TokenClass token, IntMap (IntMap IntSet))]
-scanClassOptions j chart et =
-  [ (cls, closeChartAt (j + 1) (IntMap.insert (j + 1) scanned chart) et)
+scanClassOptions et j chart =
+  [ (cls, closeChartAt et (j + 1) (IntMap.insert (j + 1) scanned chart))
   | (cls, scanned) <- Map.toAscList grouped
   ]
   where
-    grouped = IntMap.foldrWithKey advance Map.empty e_j
-    e_j = IntMap.findWithDefault IntMap.empty j chart
+    grouped = IntMap.foldrWithKey advance Map.empty eJ
+    eJ = IntMap.findWithDefault IntMap.empty j chart
 
     advance s origs acc = case IntMap.lookup s (transducerStates et) of
       Just (TransducerTokenClass cls ds) ->
@@ -343,16 +431,17 @@ scanClassOptions j chart et =
       _ -> acc
 
 closeChartAt
-  :: Int
+  :: Transducer token
+  -> Int
   -> IntMap (IntMap IntSet)
-  -> Transducer token
   -> IntMap (IntMap IntSet)
-closeChartAt j initialChart0 et = loop initialWork initialChart0
+closeChartAt et j initialChart0 = loop initialWork initialChart0
   where
-    initialE = IntMap.findWithDefault IntMap.empty j initialChart0
+    initialEJ = IntMap.findWithDefault IntMap.empty j initialChart0
     initialWork =
-      [ (s, i) | (s, os) <- IntMap.toList initialE, i <- IntSet.toList os ]
+      [ (s, i) | (s, os) <- IntMap.toList initialEJ, i <- IntSet.toList os ]
 
+    -- Earley closure at E_j: apply predict/complete until fixed point.
     loop [] chart = chart
     loop ((s, i) : rest) chart = case IntMap.lookup s (transducerStates et) of
       Just (TransducerNonTerminal name ds) ->
@@ -366,10 +455,10 @@ closeChartAt j initialChart0 et = loop initialWork initialChart0
         in loop (new <> rest) chart'
       Just (TransducerEmit name) ->
         let
-          e_i = IntMap.findWithDefault IntMap.empty i chart
+          eI = IntMap.findWithDefault IntMap.empty i chart
           completions =
             [ (d, i')
-            | (t, os) <- IntMap.toList e_i
+            | (t, os) <- IntMap.toList eI
             , Just (TransducerNonTerminal n' ds) <- [IntMap.lookup t (transducerStates et)]
             , n' == name
             , i' <- IntSet.toList os
@@ -383,12 +472,12 @@ closeChartAt j initialChart0 et = loop initialWork initialChart0
       where
         ins (acc, new) (state, origin) =
           let
-            e_j = IntMap.findWithDefault IntMap.empty j acc
-            os = IntMap.findWithDefault IntSet.empty state e_j
+            eJ = IntMap.findWithDefault IntMap.empty j acc
+            os = IntMap.findWithDefault IntSet.empty state eJ
           in if IntSet.member origin os
             then (acc, new)
             else
               let
-                e_j' = IntMap.insert state (IntSet.insert origin os) e_j
-                acc' = IntMap.insert j e_j' acc
+                eJ' = IntMap.insert state (IntSet.insert origin os) eJ
+                acc' = IntMap.insert j eJ' acc
               in (acc', (state, origin) : new)
diff --git a/src/Data/Profunctor/Grammar.hs b/src/Data/Profunctor/Grammar.hs
index 1eb2f03c..ee88cc91 100644
--- a/src/Data/Profunctor/Grammar.hs
+++ b/src/Data/Profunctor/Grammar.hs
@@ -27,7 +27,7 @@ import Control.Lens
 import Control.Lens.Extras
 import Control.Lens.Grammar.BackusNaur
 import Control.Lens.Grammar.Kleene
-import Control.Lens.Grammar.Matching
+import Control.Lens.Grammar.Machine
 import Control.Lens.Grammar.Symbol
 import Control.Lens.Grammar.Token
 import Control.Monad

From 650d5e4c216a2913ed722022fb76e75630be3d22 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Wed, 22 Apr 2026 22:39:48 -0700
Subject: [PATCH 13/31] Update Machine.hs

---
 src/Control/Lens/Grammar/Machine.hs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index 54bab061..bb03e8ee 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -54,6 +54,7 @@ module Control.Lens.Grammar.Machine
 import Control.Lens
 import Control.Lens.Extras
 import Control.Lens.Grammar.BackusNaur
+import Control.Lens.Grammar.Boole
 import Control.Lens.Grammar.Kleene
 import Control.Lens.Grammar.Token
 import Data.Foldable
@@ -298,8 +299,8 @@ This is the machine-level version of "what terminals are expected next?".
 -}
 expectedGen
   :: Categorized token
-  => Transducer token -> [token] -> [TokenClass token]
-expectedGen et word = map fst (scanClassOptions et n chart)
+  => Transducer token -> [token] -> TokenClass token
+expectedGen et word = anyB fst (scanClassOptions et n chart)
   where
     (n, chart) = prefixGen et word
 
@@ -409,9 +410,9 @@ acceptsChart et j chart = IntSet.member 0 acceptOrigins
 -- Group all scanner moves from E_j by token class; each result also carries the
 -- closed successor chart at j+1.
 scanClassOptions
-  :: Transducer token
-  -> Categorized token
-  => Int
+  :: Categorized token
+  => Transducer token
+  -> Int
   -> IntMap (IntMap IntSet)
   -> [(TokenClass token, IntMap (IntMap IntSet))]
 scanClassOptions et j chart =

From 98baf121050c0cea4be9beb8626edcdcb6e8a091 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Wed, 22 Apr 2026 23:01:49 -0700
Subject: [PATCH 14/31] Update Machine.hs

---
 src/Control/Lens/Grammar/Machine.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index bb03e8ee..03f71e49 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -295,7 +295,7 @@ Returns terminal classes that can be scanned next after the given input prefix.
 An empty result means the current chart has no scanner transitions, i.e. the
 prefix is a dead end for recognition.
 
-This is the machine-level version of "what terminals are expected next?".
+This is the machine-level version of "what tokens are expected next?".
 -}
 expectedGen
   :: Categorized token

From 4a21af78812ad2bb3a428b1eb05dcbeb9e699e6b Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Thu, 23 Apr 2026 06:57:08 -0700
Subject: [PATCH 15/31] Update Machine.hs

---
 src/Control/Lens/Grammar/Machine.hs | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index 03f71e49..b0323fcc 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -34,17 +34,13 @@ References:
 -}
 
 module Control.Lens.Grammar.Machine
-  (
-    -- * Matching Interface
+  ( -- * Matching Interface
     Matching (..)
-
     -- * Machine Representation
   , Transducer (..)
   , TransducerState (..)
-
     -- * Compilation
   , compileTransducer
-
     -- * Machine Execution Utilities
   , languageGen
   , expectedGen

From 3aa6039f417266702d6d5b07f1b9d152784c298f Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 09:41:53 -0700
Subject: [PATCH 16/31] cleaning

---
 src/Control/Lens/Grammar.hs         |   2 +-
 src/Control/Lens/Grammar/Machine.hs | 222 ++++++++++------------------
 2 files changed, 82 insertions(+), 142 deletions(-)

diff --git a/src/Control/Lens/Grammar.hs b/src/Control/Lens/Grammar.hs
index 80c23c6f..852c9dd8 100644
--- a/src/Control/Lens/Grammar.hs
+++ b/src/Control/Lens/Grammar.hs
@@ -793,7 +793,7 @@ regbnfG :: Grammar Char a -> RegBnf
 regbnfG bnf = runGrammor bnf
 
 languageG :: (Applicative f, TokenAlgebra token (f token)) => Grammar token a -> f [[token]]
-languageG bnf = languageGen (compileTransducer (runGrammor bnf))
+languageG bnf = languageGen (transducer (runGrammor bnf))
 
 {- | `printG` generates a printer from a `CtxGrammar`.
 Since both `RegGrammar`s and context-free `Grammar`s are `CtxGrammar`s,
diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index b0323fcc..c90eeda9 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -1,49 +1,24 @@
 {- |
 Module      : Control.Lens.Grammar.Machine
-Description : finite-machine compilation and Earley-style matching
+Description : matching & transducers
 Copyright   : (C) 2026 - Eitan Chatav
 License     : BSD-style (see the file LICENSE)
 Maintainer  : Eitan Chatav <eitan.chatav@gmail.com>
 Stability   : provisional
 Portability : non-portable
 
-This module presents a machine-oriented view of grammars:
-
-1. Compile a grammar with regular right-hand sides into a finite control machine
-   (`Transducer`) using a Thompson-style construction over regular expressions.
-2. Run Earley-style chart recognition over that machine.
-3. Derive practical products from the same engine: boolean matching,
-   "expected token class" reporting, language generation, and dead-rule analysis.
-
-The implementation follows mainstream Earley terminology (predict/scan/complete)
-while adopting the transducer perspective used by Jim and Mandelbaum:
-
-* Earley item `(q,i)` is represented as "machine state id @q@ with origin @i@"
-  stored in chart set @E_j@.
-* `closeChartAt` performs predict and complete to a fixed point at position @j@.
-* `scanFrom`/`scanClassOptions` perform scanner steps to build @E_{j+1}@.
-* `TransducerEmit` provides the "completed nonterminal" signal used by complete.
-
-References:
-
-* Trevor Jim and Yitzhak Mandelbaum, /Efficient Earley Parsing with Regular
-  Right-hand Sides/ (LDTA 2009).
-* Thompson-style NFA compilation accounts from compiler literature.
-* Standard Earley algorithm presentations (state sets, origin indices,
-  predictor/scanner/completer).
+See 
 -}
 
 module Control.Lens.Grammar.Machine
-  ( -- * Matching Interface
+  ( -- * Matching
     Matching (..)
-    -- * Machine Representation
+    -- * Transducer
   , Transducer (..)
-  , TransducerState (..)
-    -- * Compilation
-  , compileTransducer
-    -- * Machine Execution Utilities
-  , languageGen
+  , TransducerStep (..)
+  , transducer
   , expectedGen
+  , languageGen
   , unreachableGen
   ) where
 
@@ -64,76 +39,46 @@ import qualified Data.Set as Set
 import Data.Set (Set)
 
 -- | Does a word match a pattern?
---
--- For grammar and regular-expression patterns in this package, matching is
--- performed by chart recognition over a compiled machine (`Transducer`).
 class Matching word pattern | pattern -> word where
   (=~) :: word -> pattern -> Bool
   infix 2 =~
 
--- | A control state in the compiled parsing machine.
---
--- Read these constructors as machine instructions:
---
--- * `TransducerTokenClass` is a scanner transition over a terminal class.
--- * `TransducerNonTerminal` is a call site for a nonterminal.
--- * `TransducerEmit` is a completed nonterminal output used by completion.
---
--- This corresponds to the "finite control + call/return" view of parsing
--- transducers in LDTA 2009.
-data TransducerState token
-  = TransducerTokenClass (TokenClass token) IntSet
-  | TransducerNonTerminal String IntSet
-  | TransducerEmit String
-
--- | Finite control machine used by the Earley-style recognizer.
---
--- Formal machine view (adapted for this parser):
---
--- @
--- T = (Q, Σ, Γ, I, F, δ)
--- @
---
--- where:
---
--- * @Q@ (control states) = keys of `transducerStates`.
--- * @Σ@ (terminal alphabet) is represented intensionally by
---   `TokenClass token` labels in `TransducerTokenClass` transitions.
--- * @Γ@ (call alphabet / nonterminal symbols) = keys of `transducerRules`
---   and names carried by `TransducerNonTerminal` / `TransducerEmit`.
--- * @I@ (initial states) = `transducerStartStates`.
--- * @F@ (accepting states) = singleton `{transducerAcceptId}` for recognition.
--- * @δ@ (transition relation) is encoded by `transducerStates` constructors:
---
---   * `TransducerTokenClass cls ds` contributes terminal transitions on any
---     token matching @cls@ from the current state to each state in @ds@.
---   * `TransducerNonTerminal name ds` contributes call transitions on symbol
---     @name@ with return destinations @ds@ (used by Earley predict/complete).
---   * `TransducerEmit name` is a completion/output state for nonterminal @name@,
---     consumed by Earley completion rather than terminal scanning.
---
--- `transducerRules` is auxiliary indexing for Earley closure (entry-state set and
--- nullability per nonterminal), not an additional machine component.
+{-| A `Transducer` is a tuple
+
+@
+T = (Σ, Δ, Q, I ⊆ Q, F ∈ Q, transition ⊆ Q × (Σ ∪ ∆) × Q, output ⊆ Q × ∆)
+@
+
+* @Σ@ is a (possibly infinite) set of terminal token classes, represented by `TokenClass`es.
+* @Δ@ is a finite set of nonterminals, represented by the key set of `transducerRules`.
+* @Q@ is a set of states, which is represented by the key set of `transducerRelations`.
+* @I@ are initial states represented by `transducerStarts`.
+* @F@ is a final state represented by @0@.
+* @transition@ is a relation represented by `transducerRelations`
+  with `TransitionTokenClass` and `TransitionNonTerminal` transitions.
+* @output@ is a relation represented by `transducerRelations` with `EmitNonTerminal` outputs.
+-}
 data Transducer token = Transducer
-  { transducerStates :: IntMap (TransducerState token) -- ^ @Q@
-  , transducerRules :: Map String (IntSet, Bool) -- ^ @Γ@
-  , transducerAcceptId :: Int -- ^ @F@
-  , transducerStartStates :: IntSet -- ^ @I@
+  { transducerRelations :: IntMap (TransducerStep token)
+  , transducerRules :: Map String (IntSet, Bool)
+  -- ^ an index into `transducerRelations` for nonterminals with precomputed nullability
+  , transducerStarts :: IntSet
   }
 
--- | Compile a regular-right-side grammar into a parsing transducer.
---
--- Construction outline:
---
--- * Regular-expression fragments are lowered in Thompson style by `goEarley`.
--- * Each nonterminal gets a distinguished emit/final state.
--- * Concatenation/alternation/Kleene operators wire state sets and bypassability
---   (nullability) as in regular-expression automata construction.
--- * A fixed-point nullability analysis over the grammar enables null completion
---   during Earley closure.
-compileTransducer :: Bnf (RegEx token) -> Transducer token
-compileTransducer (Bnf start rules) = Transducer
-  { transducerStates = IntMap.fromList allStates
+-- | A `TransducerStep` in a `Transducer`.
+data TransducerStep token
+  = TransitionTokenClass (TokenClass token) IntSet
+  | TransitionNonTerminal String IntSet
+  | EmitNonTerminal String
+
+{- | Compile a `RegEx`tended `Bnf` into a `Transducer`,
+using a combination of Thompson's algorithm for regular expressions
+and Earley's algorithm for context-free grammars. See Jim & Mandelbaum,
+[Efficient Earley Parsing with Regular Right-hand Sides](http://trevorjim.com/papers/ldta-2009.pdf).
+-}
+transducer :: Bnf (RegEx token) -> Transducer token
+transducer (Bnf start rules) = Transducer
+  { transducerRelations = IntMap.fromList allStates
   , transducerRules = Map.fromList
       [ ( n
         , ( Map.findWithDefault IntSet.empty n firstsMap
@@ -142,8 +87,7 @@ compileTransducer (Bnf start rules) = Transducer
         )
       | n <- Map.keys ruleMap
       ]
-  , transducerAcceptId = transducerAcceptId0
-  , transducerStartStates = startStates
+  , transducerStarts = startStates
   }
 
   where
@@ -177,7 +121,7 @@ compileTransducer (Bnf start rules) = Transducer
       foldl' alloc (Map.empty, transducerAcceptId0 + 1) ruleNames
       where alloc (m, i) n = (Map.insert n i m, i + 1)
 
-    finalStatesList = [(finalMap Map.! n, TransducerEmit n) | n <- ruleNames]
+    finalStatesList = [(finalMap Map.! n, EmitNonTerminal n) | n <- ruleNames]
 
     (rulesStatesList, firstsMap, nextIdAfterRules) =
       foldl' compileRule ([], Map.empty, nextIdAfterFinals) (Map.toList ruleMap)
@@ -188,12 +132,12 @@ compileTransducer (Bnf start rules) = Transducer
                 foldl' compileProd ([], IntSet.empty, nid) prods
               compileProd (s, fs, i) prod =
                 let (f, st, i', _) =
-                      goEarley prod i (IntSet.singleton finalId)
+                      thompson prod i (IntSet.singleton finalId)
                 in (s <> st, fs <> f, i')
           in (sts <> newSts, Map.insert name newFirsts fm, nid')
 
     (startFirsts, startStatesRaw, _, startBypass) =
-      goEarley start nextIdAfterRules (IntSet.singleton transducerAcceptId0)
+      thompson start nextIdAfterRules (IntSet.singleton transducerAcceptId0)
 
     startStates =
       startFirsts <> bypassStates startBypass (IntSet.singleton transducerAcceptId0)
@@ -203,19 +147,19 @@ compileTransducer (Bnf start rules) = Transducer
     bypassStates True = id
     bypassStates False = const IntSet.empty
 
-    goEarley rex nextId dests = case rex of
+    thompson rex nextId dests = case rex of
         SeqEmpty -> (IntSet.empty, [], nextId, True)
         NonTerminal name ->
           ( IntSet.singleton nextId
-          , [(nextId, TransducerNonTerminal name dests)]
+          , [(nextId, TransitionNonTerminal name dests)]
           , nextId + 1
           , Map.findWithDefault False name nullMap
           )
         Sequence rex0 rex1 ->
           let
-            (firsts1, states1, nextId1, bypass1) = goEarley rex1 nextId dests
+            (firsts1, states1, nextId1, bypass1) = thompson rex1 nextId dests
             (firsts0, states0, nextId0, bypass0) =
-              goEarley rex0 nextId1 (firsts1 <> bypassStates bypass1 dests)
+              thompson rex0 nextId1 (firsts1 <> bypassStates bypass1 dests)
           in
             ( firsts0 <> bypassStates bypass0 firsts1
             , states0 <> states1
@@ -224,37 +168,37 @@ compileTransducer (Bnf start rules) = Transducer
             )
         KleeneStar rex0 ->
           let
-            (firsts, states, nextId', _) = goEarley rex0 nextId (firsts <> dests)
+            (firsts, states, nextId', _) = thompson rex0 nextId (firsts <> dests)
           in
             (firsts, states, nextId', True)
         KleeneOpt rex0 ->
           let
-            (firsts, states, nextId', _) = goEarley rex0 nextId dests
+            (firsts, states, nextId', _) = thompson rex0 nextId dests
           in
             (firsts, states, nextId', True)
         KleenePlus rex0 ->
           let
-            (firsts, states, nextId', bypass) = goEarley rex0 nextId (firsts <> dests)
+            (firsts, states, nextId', bypass) = thompson rex0 nextId (firsts <> dests)
           in
             (firsts, states, nextId', bypass)
         RegExam (OneOf chars)
           | Set.null chars -> (IntSet.empty, [], nextId, False)
           | otherwise ->
               ( IntSet.singleton nextId
-              , [(nextId, TransducerTokenClass (TokenClass (OneOf chars)) dests)]
+              , [(nextId, TransitionTokenClass (TokenClass (OneOf chars)) dests)]
               , nextId + 1
               , False
               )
         RegExam (NotOneOf chars catTest) ->
           ( IntSet.singleton nextId
-          , [(nextId, TransducerTokenClass (TokenClass (NotOneOf chars catTest)) dests)]
+          , [(nextId, TransitionTokenClass (TokenClass (NotOneOf chars catTest)) dests)]
           , nextId + 1
           , False
           )
         RegExam (Alternate rex0 rex1) ->
           let
-            (firsts1, states1, nextId1, bypass1) = goEarley rex1 nextId dests
-            (firsts0, states0, nextId0, bypass0) = goEarley rex0 nextId1 dests
+            (firsts1, states1, nextId1, bypass1) = thompson rex1 nextId dests
+            (firsts0, states0, nextId0, bypass0) = thompson rex0 nextId1 dests
           in
             ( firsts0 <> firsts1
             , states0 <> states1
@@ -278,24 +222,21 @@ prefixGen et word = go 0 (initialChart et) word
     scanFrom j input chart = IntMap.foldrWithKey advance IntMap.empty eJ
       where
         eJ = IntMap.findWithDefault IntMap.empty j chart
-        advance s origs acc = case IntMap.lookup s (transducerStates et) of
-          Just (TransducerTokenClass cls ds) | tokenClass cls input ->
+        advance s origs acc = case IntMap.lookup s (transducerRelations et) of
+          Just (TransitionTokenClass cls ds) | tokenClass cls input ->
             IntSet.foldr
               (\d -> IntMap.insertWith IntSet.union d origs) acc ds
           _ -> acc
 
-{- |
-Earley scanner frontier summarized as token classes.
-
-Returns terminal classes that can be scanned next after the given input prefix.
-An empty result means the current chart has no scanner transitions, i.e. the
-prefix is a dead end for recognition.
-
-This is the machine-level version of "what tokens are expected next?".
+{- | What token is expected next?
+The scanner frontier, `expectedGen` returns `TokenClass`
+that can be scanned next after the given input prefix.
+A `falseB` result means the current chart has no scanner transitions,
+i.e. the prefix is a dead end for recognition.
 -}
 expectedGen
   :: Categorized token
-  => Transducer token -> [token] -> TokenClass token
+  => Transducer token -> [token] {- ^ prefix -} -> TokenClass token
 expectedGen et word = anyB fst (scanClassOptions et n chart)
   where
     (n, chart) = prefixGen et word
@@ -311,7 +252,7 @@ unreachableGen :: Transducer token -> Set String
 unreachableGen et =
   Map.keysSet (transducerRules et) `Set.difference` called
   where
-    called = bfs (transducerStartStates et) IntSet.empty Set.empty
+    called = bfs (transducerStarts et) IntSet.empty Set.empty
 
     bfs frontier seen calls
       | IntSet.null fresh = calls
@@ -320,19 +261,19 @@ unreachableGen et =
         fresh = IntSet.difference frontier seen
         (next, calls') = IntSet.foldr step (IntSet.empty, calls) fresh
 
-    step s (acc, cs) = case IntMap.lookup s (transducerStates et) of
-      Just (TransducerTokenClass _ ds) -> (acc <> ds, cs)
-      Just (TransducerNonTerminal name ds) ->
+    step s (acc, cs) = case IntMap.lookup s (transducerRelations et) of
+      Just (TransitionTokenClass _ ds) -> (acc <> ds, cs)
+      Just (TransitionNonTerminal name ds) ->
         let firsts = maybe IntSet.empty fst (Map.lookup name (transducerRules et))
         in (acc <> ds <> firsts, Set.insert name cs)
-      Just (TransducerEmit _) -> (acc, cs)
+      Just (EmitNonTerminal _) -> (acc, cs)
       Nothing -> (acc, cs)
 -- instances
 instance Categorized token
   => Matching [token] (Bnf (RegEx token)) where
-    word =~ bnf = acceptsChart et n chart
+    word =~ bnf = acceptsChart n chart
       where
-        et = compileTransducer bnf
+        et = transducer bnf
         (n, chart) = prefixGen et word
 instance Categorized token
   => Matching [token] (RegEx token) where
@@ -373,7 +314,7 @@ languageGen et = sequenceA (fmap sampleWord classWords)
       in (reverse acceptedRev, seen')
       where
         step (acc, seen) (j, revWord, chart)
-          | acceptsChart et j chart =
+          | acceptsChart j chart =
               if Set.member revWord seen
                 then (acc, seen)
                 else (revWord : acc, Set.insert revWord seen)
@@ -390,18 +331,17 @@ initialChart
 initialChart et = closeChartAt et 0 (IntMap.singleton 0 initialE0)
   where
     initialE0 = IntMap.fromList
-      [ (s, IntSet.singleton 0) | s <- IntSet.toList (transducerStartStates et) ]
+      [ (s, IntSet.singleton 0) | s <- IntSet.toList (transducerStarts et) ]
 
 -- Accept iff (q_accept, 0) is in E_n.
 acceptsChart
-  :: Transducer token
-  -> Int
+  :: Int
   -> IntMap (IntMap IntSet)
   -> Bool
-acceptsChart et j chart = IntSet.member 0 acceptOrigins
+acceptsChart j chart = IntSet.member 0 acceptOrigins
   where
     eJ = IntMap.findWithDefault IntMap.empty j chart
-    acceptOrigins = IntMap.findWithDefault IntSet.empty (transducerAcceptId et) eJ
+    acceptOrigins = IntMap.findWithDefault IntSet.empty 0 eJ
 
 -- Group all scanner moves from E_j by token class; each result also carries the
 -- closed successor chart at j+1.
@@ -419,8 +359,8 @@ scanClassOptions et j chart =
     grouped = IntMap.foldrWithKey advance Map.empty eJ
     eJ = IntMap.findWithDefault IntMap.empty j chart
 
-    advance s origs acc = case IntMap.lookup s (transducerStates et) of
-      Just (TransducerTokenClass cls ds) ->
+    advance s origs acc = case IntMap.lookup s (transducerRelations et) of
+      Just (TransitionTokenClass cls ds) ->
         Map.insertWith (IntMap.unionWith IntSet.union) cls scanned acc
         where
           scanned = IntSet.foldr
@@ -440,8 +380,8 @@ closeChartAt et j initialChart0 = loop initialWork initialChart0
 
     -- Earley closure at E_j: apply predict/complete until fixed point.
     loop [] chart = chart
-    loop ((s, i) : rest) chart = case IntMap.lookup s (transducerStates et) of
-      Just (TransducerNonTerminal name ds) ->
+    loop ((s, i) : rest) chart = case IntMap.lookup s (transducerRelations et) of
+      Just (TransitionNonTerminal name ds) ->
         let
           (firsts, isNull) = Map.findWithDefault
             (IntSet.empty, False) name (transducerRules et)
@@ -450,13 +390,13 @@ closeChartAt et j initialChart0 = loop initialWork initialChart0
             if isNull then [(d, i) | d <- IntSet.toList ds] else []
           (chart', new) = addEarleyItems (predItems <> nullItems) chart
         in loop (new <> rest) chart'
-      Just (TransducerEmit name) ->
+      Just (EmitNonTerminal name) ->
         let
           eI = IntMap.findWithDefault IntMap.empty i chart
           completions =
             [ (d, i')
             | (t, os) <- IntMap.toList eI
-            , Just (TransducerNonTerminal n' ds) <- [IntMap.lookup t (transducerStates et)]
+            , Just (TransitionNonTerminal n' ds) <- [IntMap.lookup t (transducerRelations et)]
             , n' == name
             , i' <- IntSet.toList os
             , d <- IntSet.toList ds

From b8c7b7868a9ea7cac74a7e479a61d4bca084b23a Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 10:56:24 -0700
Subject: [PATCH 17/31] cleaning

---
 src/Control/Lens/Grammar.hs         |  6 ++-
 src/Control/Lens/Grammar/Machine.hs | 83 +++++++++++++++--------------
 2 files changed, 47 insertions(+), 42 deletions(-)

diff --git a/src/Control/Lens/Grammar.hs b/src/Control/Lens/Grammar.hs
index 852c9dd8..5b2b10e4 100644
--- a/src/Control/Lens/Grammar.hs
+++ b/src/Control/Lens/Grammar.hs
@@ -24,6 +24,7 @@ module Control.Lens.Grammar
   , regbnfG
   , regbnfGrammar
   , applicativeG
+  , transducerG
   , languageG
     -- * Context-sensitive grammar
   , CtxGrammar
@@ -792,8 +793,11 @@ It can apply to a `RegGrammar`.
 regbnfG :: Grammar Char a -> RegBnf
 regbnfG bnf = runGrammor bnf
 
+transducerG :: Categorized token => Grammar token a -> Transducer token
+transducerG bnf = transducer (runGrammor bnf)
+
 languageG :: (Applicative f, TokenAlgebra token (f token)) => Grammar token a -> f [[token]]
-languageG bnf = languageGen (transducer (runGrammor bnf))
+languageG bnf = languageRun (transducer (runGrammor bnf))
 
 {- | `printG` generates a printer from a `CtxGrammar`.
 Since both `RegGrammar`s and context-free `Grammar`s are `CtxGrammar`s,
diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index c90eeda9..8f44f1f1 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -6,20 +6,18 @@ License     : BSD-style (see the file LICENSE)
 Maintainer  : Eitan Chatav <eitan.chatav@gmail.com>
 Stability   : provisional
 Portability : non-portable
-
-See 
 -}
 
 module Control.Lens.Grammar.Machine
   ( -- * Matching
     Matching (..)
     -- * Transducer
+  , transducer
+  , languageRun
+  , expectedRun
+  , unreachableRun
   , Transducer (..)
   , TransducerStep (..)
-  , transducer
-  , expectedGen
-  , languageGen
-  , unreachableGen
   ) where
 
 import Control.Lens
@@ -42,6 +40,20 @@ import Data.Set (Set)
 class Matching word pattern | pattern -> word where
   (=~) :: word -> pattern -> Bool
   infix 2 =~
+-- instances
+instance Categorized token
+  => Matching [token] (Transducer token) where
+    word =~ et = acceptsChart n chart
+      where
+        (n, chart) = prefixRun et word
+instance Categorized token
+  => Matching [token] (Bnf (RegEx token)) where
+    word =~ bnf = word =~ transducer bnf
+instance Categorized token
+  => Matching [token] (RegEx token) where
+    word =~ pattern = word =~ liftBnf0 pattern
+instance Matching s (APrism s t a b) where
+  word =~ pattern = is pattern word
 
 {-| A `Transducer` is a tuple
 
@@ -74,7 +86,14 @@ data TransducerStep token
 {- | Compile a `RegEx`tended `Bnf` into a `Transducer`,
 using a combination of Thompson's algorithm for regular expressions
 and Earley's algorithm for context-free grammars. See Jim & Mandelbaum,
-[Efficient Earley Parsing with Regular Right-hand Sides](http://trevorjim.com/papers/ldta-2009.pdf).
+[Efficient Earley Parsing with Regular Right-hand Sides]
+(http://trevorjim.com/papers/ldta-2009.pdf),
+and McIlroy, [Enumerating the strings of regular languages]
+(https://www.cs.dartmouth.edu/~doug/nfa.pdf).
+
+A transducer is a form of [finite state machine]
+(https://www.scribd.com/doc/76189520/John-H-Conway-Regular-Algebra-and-Finite-Machines)
+that can be run in various ways like `=~`, `expectedRun`, `languageRun` & `unreachableRun`.
 -}
 transducer :: Bnf (RegEx token) -> Transducer token
 transducer (Bnf start rules) = Transducer
@@ -206,12 +225,12 @@ transducer (Bnf start rules) = Transducer
             , bypass0 || bypass1
             )
 
-prefixGen
+prefixRun
   :: Categorized token
   => Transducer token
   -> [token]
   -> (Int, IntMap (IntMap IntSet))
-prefixGen et word = go 0 (initialChart et) word
+prefixRun et word = go 0 (initialChart et) word
   where
     go j chart [] = (j, chart)
     go j chart (x : xs) =
@@ -229,27 +248,25 @@ prefixGen et word = go 0 (initialChart et) word
           _ -> acc
 
 {- | What token is expected next?
-The scanner frontier, `expectedGen` returns `TokenClass`
+The scanner frontier, `expectedRun` returns the `TokenClass`
 that can be scanned next after the given input prefix.
 A `falseB` result means the current chart has no scanner transitions,
 i.e. the prefix is a dead end for recognition.
 -}
-expectedGen
+expectedRun
   :: Categorized token
   => Transducer token -> [token] {- ^ prefix -} -> TokenClass token
-expectedGen et word = anyB fst (scanClassOptions et n chart)
+expectedRun et word = anyB fst (scanClassOptions et n chart)
   where
-    (n, chart) = prefixGen et word
+    (n, chart) = prefixRun et word
 
 {- |
-Rule names declared in the `Bnf` that can never be entered from the start
+Rule names that can never be entered from the start
 expression — dead productions. A non-empty result is a grammar-hygiene
 warning: those rules can be deleted without changing the recognized language.
-
-Operationally, this is reachability over control states plus nonterminal calls.
 -}
-unreachableGen :: Transducer token -> Set String
-unreachableGen et =
+unreachableRun :: Transducer token -> Set String
+unreachableRun et =
   Map.keysSet (transducerRules et) `Set.difference` called
   where
     called = bfs (transducerStarts et) IntSet.empty Set.empty
@@ -268,34 +285,18 @@ unreachableGen et =
         in (acc <> ds <> firsts, Set.insert name cs)
       Just (EmitNonTerminal _) -> (acc, cs)
       Nothing -> (acc, cs)
--- instances
-instance Categorized token
-  => Matching [token] (Bnf (RegEx token)) where
-    word =~ bnf = acceptsChart n chart
-      where
-        et = transducer bnf
-        (n, chart) = prefixGen et word
-instance Categorized token
-  => Matching [token] (RegEx token) where
-    word =~ pattern = word =~ liftBnf0 pattern
-instance Matching s (APrism s t a b) where
-  word =~ pattern = is pattern word
 
 {- |
-Generate words recognized by a grammar machine using chart progression.
-
-The algorithm performs a breadth-first exploration over scanner frontiers derived
-from Earley sets, so words are produced by nondecreasing length.
-
-Chart/state progression is deterministic (state id order). Token realization uses
-`TokenAlgebra` and may be nondeterministic, but is always valid for the chosen
-terminal class.
+`languageRun` lazily produces all words in a language from shortest to longest.
+However since `TokenClass`es can resolve to infinite sets of tokens,
+and the relevant case of `Char` tokens while not infinite is huge,
+it samples tokens in an `Applicative` `TokenAlgebra`.
 -}
-languageGen
+languageRun
   :: (Applicative f, TokenAlgebra token (f token))
-  => Transducer token
+  => Transducer token -- ^ transducer
   -> f [[token]]
-languageGen et = sequenceA (fmap sampleWord classWords)
+languageRun et = sequenceA (fmap sampleWord classWords)
   where
 
     classWords = enumerateByLength [(0, [], initialChart et)] Set.empty

From 5c2ade7dff3782a4c49cbf75150aada681d8a5e2 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 14:56:25 -0700
Subject: [PATCH 18/31] cleaning

---
 src/Control/Lens/Grammar.hs         | 7 ++++++-
 src/Control/Lens/Grammar/Machine.hs | 3 +--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/Control/Lens/Grammar.hs b/src/Control/Lens/Grammar.hs
index 5b2b10e4..4eac34df 100644
--- a/src/Control/Lens/Grammar.hs
+++ b/src/Control/Lens/Grammar.hs
@@ -793,11 +793,16 @@ It can apply to a `RegGrammar`.
 regbnfG :: Grammar Char a -> RegBnf
 regbnfG bnf = runGrammor bnf
 
+{- | Compile a `Grammar` into a `Transducer`.
+
+A transducer is a form of finite state machine
+that can be run in various ways like `=~`, `expectedRun`, `languageRun` & `unreachableRun`.
+-}
 transducerG :: Categorized token => Grammar token a -> Transducer token
 transducerG bnf = transducer (runGrammor bnf)
 
 languageG :: (Applicative f, TokenAlgebra token (f token)) => Grammar token a -> f [[token]]
-languageG bnf = languageRun (transducer (runGrammor bnf))
+languageG bnf = languageRun (transducerG bnf)
 
 {- | `printG` generates a printer from a `CtxGrammar`.
 Since both `RegGrammar`s and context-free `Grammar`s are `CtxGrammar`s,
diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index 8f44f1f1..be5476c8 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -91,8 +91,7 @@ and Earley's algorithm for context-free grammars. See Jim & Mandelbaum,
 and McIlroy, [Enumerating the strings of regular languages]
 (https://www.cs.dartmouth.edu/~doug/nfa.pdf).
 
-A transducer is a form of [finite state machine]
-(https://www.scribd.com/doc/76189520/John-H-Conway-Regular-Algebra-and-Finite-Machines)
+A transducer is a form of finite state machine
 that can be run in various ways like `=~`, `expectedRun`, `languageRun` & `unreachableRun`.
 -}
 transducer :: Bnf (RegEx token) -> Transducer token

From 23559716d167e18a055346ab915f36671ee7d8b1 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 15:06:42 -0700
Subject: [PATCH 19/31] Update Machine.hs

---
 src/Control/Lens/Grammar/Machine.hs | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index be5476c8..0673a924 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -100,7 +100,7 @@ transducer (Bnf start rules) = Transducer
   , transducerRules = Map.fromList
       [ ( n
         , ( Map.findWithDefault IntSet.empty n firstsMap
-          , Map.findWithDefault False n nullMap
+          , Set.member n nullSet
           )
         )
       | n <- Map.keys ruleMap
@@ -115,7 +115,7 @@ transducer (Bnf start rules) = Transducer
 
     rexNullable nm = \case
       SeqEmpty -> True
-      NonTerminal n -> Map.findWithDefault False n nm
+      NonTerminal n -> Set.member n nm
       Sequence x y -> rexNullable nm x && rexNullable nm y
       KleeneStar _ -> True
       KleeneOpt _ -> True
@@ -124,14 +124,17 @@ transducer (Bnf start rules) = Transducer
       RegExam (OneOf _) -> False
       RegExam (NotOneOf _ _) -> False
 
-    iterNull nm =
-      let nm' = Map.mapWithKey
-            (\n _ -> any (rexNullable nm) (Map.findWithDefault [] n ruleMap)) nm
-      in if nm == nm' then nm else iterNull nm'
+    ruleNames = Map.keys ruleMap
 
-    nullMap = iterNull (Map.map (const False) ruleMap)
+    iterNull ns =
+      let ns' = Set.fromList
+            [ n
+            | n <- ruleNames
+            , any (rexNullable ns) (Map.findWithDefault [] n ruleMap)
+            ]
+      in if ns == ns' then ns else iterNull ns'
 
-    ruleNames = Map.keys ruleMap
+    nullSet = iterNull Set.empty
 
     transducerAcceptId0 = 0
 
@@ -171,7 +174,7 @@ transducer (Bnf start rules) = Transducer
           ( IntSet.singleton nextId
           , [(nextId, TransitionNonTerminal name dests)]
           , nextId + 1
-          , Map.findWithDefault False name nullMap
+          , Set.member name nullSet
           )
         Sequence rex0 rex1 ->
           let

From 05f77056641c7ebda4319dea4e7be6ab368bc943 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 15:20:59 -0700
Subject: [PATCH 20/31] lazy state

---
 src/Control/Lens/Grammar/Kleene.hs | 2 +-
 src/Control/Lens/Grammar/Token.hs  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Control/Lens/Grammar/Kleene.hs b/src/Control/Lens/Grammar/Kleene.hs
index 03e8d739..85c54c1f 100644
--- a/src/Control/Lens/Grammar/Kleene.hs
+++ b/src/Control/Lens/Grammar/Kleene.hs
@@ -30,7 +30,7 @@ import Control.Lens.Grammar.Boole
 import Control.Lens.Grammar.Symbol
 import Control.Lens.Grammar.Token
 import Control.Monad.Loops
-import Control.Monad.Trans.State.Strict (StateT, state)
+import Control.Monad.State (StateT, state)
 import Data.Bifunctor.Joker
 import Data.Foldable
 import Data.MemoTrie
diff --git a/src/Control/Lens/Grammar/Token.hs b/src/Control/Lens/Grammar/Token.hs
index 1bede35c..5ad21da5 100644
--- a/src/Control/Lens/Grammar/Token.hs
+++ b/src/Control/Lens/Grammar/Token.hs
@@ -27,7 +27,7 @@ import Data.Foldable
 import Data.Profunctor
 import Data.Profunctor.Monoidal
 import Data.Word
-import Control.Monad.Trans.State.Strict (StateT, state)
+import Control.Monad.State (StateT, state)
 import System.Random (RandomGen, Random, random, randomR)
 import Test.QuickCheck.Arbitrary
 import Test.QuickCheck.Gen (Gen)

From ac3d8172cf72163e1513283c8fa7886414184b2d Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 15:22:16 -0700
Subject: [PATCH 21/31] Update Grammar.hs

---
 src/Control/Lens/Grammar.hs | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/Control/Lens/Grammar.hs b/src/Control/Lens/Grammar.hs
index 4eac34df..11b2ad0a 100644
--- a/src/Control/Lens/Grammar.hs
+++ b/src/Control/Lens/Grammar.hs
@@ -801,7 +801,28 @@ that can be run in various ways like `=~`, `expectedRun`, `languageRun` & `unrea
 transducerG :: Categorized token => Grammar token a -> Transducer token
 transducerG bnf = transducer (runGrammor bnf)
 
-languageG :: (Applicative f, TokenAlgebra token (f token)) => Grammar token a -> f [[token]]
+{- |
+`languageG` lazily produces all words in a language from shortest to longest.
+However since `TokenClass`es can resolve to infinite sets of tokens,
+and the relevant case of `Char` tokens while not infinite is huge,
+it samples tokens in an `Applicative` `TokenAlgebra`.
+
+>>> import Control.Monad.State
+>>> import System.Random
+>>> let gen = mkStdGen 69
+>>> evalState (take 10 <$> languageG @Char regexGrammar) gen
+["","|","\776269","()","[]","\\[","||","|\249908","\770923*","\1008821+"]
+
+This is useful for generating valid language examples for property tests.
+
+>>> import Test.QuickCheck
+>>> let rg = regbnfG regexGrammar
+>>> words100 <- generate (take 100 <$> languageG @Char regexGrammar)
+>>> quickCheckWith stdArgs {maxSuccess = 1} (property (all (=~ rg) words100))
++++ OK, passed 1 test.
+
+-}
+languageG :: (TokenAlgebra token (f token), Applicative f) => Grammar token a -> f [[token]]
 languageG bnf = languageRun (transducerG bnf)
 
 {- | `printG` generates a printer from a `CtxGrammar`.

From cc2332573868f3b27badceb79c02338397f83c06 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 15:38:42 -0700
Subject: [PATCH 22/31] cache optimization

For each Earley set E_i with i < j, build a caller index (keyed by nonterminal name) once and reuse it across all EmitNonTerminal completions that share the same origin i, avoiding repeated linear scans of E_i per completion.
---
 src/Control/Lens/Grammar/Machine.hs | 41 ++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index 0673a924..ea14d1ac 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -375,15 +375,21 @@ closeChartAt
   -> Int
   -> IntMap (IntMap IntSet)
   -> IntMap (IntMap IntSet)
-closeChartAt et j initialChart0 = loop initialWork initialChart0
+closeChartAt et j initialChart0 = loop initialWork initialChart0 IntMap.empty
   where
     initialEJ = IntMap.findWithDefault IntMap.empty j initialChart0
     initialWork =
       [ (s, i) | (s, os) <- IntMap.toList initialEJ, i <- IntSet.toList os ]
 
+    -- For fixed i < j, E_i does not change while closing E_j. Cache an index
+    -- from nonterminal name to caller origins/continuations to speed completion.
+    -- IntMap key: origin index i
+    -- Map key: nonterminal name
+    -- Value: list of (caller origins, continuation destinations)
+
     -- Earley closure at E_j: apply predict/complete until fixed point.
-    loop [] chart = chart
-    loop ((s, i) : rest) chart = case IntMap.lookup s (transducerRelations et) of
+    loop [] chart _ = chart
+    loop ((s, i) : rest) chart callerCache = case IntMap.lookup s (transducerRelations et) of
       Just (TransitionNonTerminal name ds) ->
         let
           (firsts, isNull) = Map.findWithDefault
@@ -392,21 +398,36 @@ closeChartAt et j initialChart0 = loop initialWork initialChart0
           nullItems =
             if isNull then [(d, i) | d <- IntSet.toList ds] else []
           (chart', new) = addEarleyItems (predItems <> nullItems) chart
-        in loop (new <> rest) chart'
+        in loop (new <> rest) chart' callerCache
       Just (EmitNonTerminal name) ->
         let
-          eI = IntMap.findWithDefault IntMap.empty i chart
+          (indexed, callerCache') = callerEntries i chart callerCache
+          callerRows = Map.findWithDefault [] name indexed
           completions =
             [ (d, i')
-            | (t, os) <- IntMap.toList eI
-            , Just (TransitionNonTerminal n' ds) <- [IntMap.lookup t (transducerRelations et)]
-            , n' == name
+            | (os, ds) <- callerRows
             , i' <- IntSet.toList os
             , d <- IntSet.toList ds
             ]
           (chart', new) = addEarleyItems completions chart
-        in loop (new <> rest) chart'
-      _ -> loop rest chart
+        in loop (new <> rest) chart' callerCache'
+      _ -> loop rest chart callerCache
+
+    callerEntries i chart callerCache
+      -- E_j mutates during closure, so do not cache index for i == j.
+      | i == j = (buildCallerIndex (IntMap.findWithDefault IntMap.empty i chart), callerCache)
+      | otherwise = case IntMap.lookup i callerCache of
+          Just indexed -> (indexed, callerCache)
+          Nothing ->
+            let indexed = buildCallerIndex (IntMap.findWithDefault IntMap.empty i chart)
+            in (indexed, IntMap.insert i indexed callerCache)
+
+    buildCallerIndex eI = IntMap.foldrWithKey step Map.empty eI
+      where
+        step t os acc = case IntMap.lookup t (transducerRelations et) of
+          Just (TransitionNonTerminal n ds) ->
+            Map.insertWith (++) n [(os, ds)] acc
+          _ -> acc
 
     addEarleyItems items chart = foldl' ins (chart, []) items
       where

From 1bfe148bd73aa79f569ebc4fd1f382c54766c29f Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 16:06:29 -0700
Subject: [PATCH 23/31] Update Machine.hs

---
 src/Control/Lens/Grammar/Machine.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index ea14d1ac..f31e8cf9 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -401,8 +401,8 @@ closeChartAt et j initialChart0 = loop initialWork initialChart0 IntMap.empty
         in loop (new <> rest) chart' callerCache
       Just (EmitNonTerminal name) ->
         let
-          (indexed, callerCache') = callerEntries i chart callerCache
-          callerRows = Map.findWithDefault [] name indexed
+          (ixed, callerCache') = callerEntries i chart callerCache
+          callerRows = Map.findWithDefault [] name ixed
           completions =
             [ (d, i')
             | (os, ds) <- callerRows
@@ -417,10 +417,10 @@ closeChartAt et j initialChart0 = loop initialWork initialChart0 IntMap.empty
       -- E_j mutates during closure, so do not cache index for i == j.
       | i == j = (buildCallerIndex (IntMap.findWithDefault IntMap.empty i chart), callerCache)
       | otherwise = case IntMap.lookup i callerCache of
-          Just indexed -> (indexed, callerCache)
+          Just ixed -> (ixed, callerCache)
           Nothing ->
-            let indexed = buildCallerIndex (IntMap.findWithDefault IntMap.empty i chart)
-            in (indexed, IntMap.insert i indexed callerCache)
+            let ixed = buildCallerIndex (IntMap.findWithDefault IntMap.empty i chart)
+            in (ixed, IntMap.insert i ixed callerCache)
 
     buildCallerIndex eI = IntMap.foldrWithKey step Map.empty eI
       where

From d6b8d76ff0aaa3dc333f393ff1ae14b3696ce1fb Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 16:35:53 -0700
Subject: [PATCH 24/31] parseForestRun

---
 src/Control/Lens/Grammar/Machine.hs | 116 +++++++++++++++++++++++++++-
 test/Main.hs                        |  24 ++++++
 2 files changed, 136 insertions(+), 4 deletions(-)

diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index f31e8cf9..89fe0e2d 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -13,6 +13,7 @@ module Control.Lens.Grammar.Machine
     Matching (..)
     -- * Transducer
   , transducer
+  , parseForestRun
   , languageRun
   , expectedRun
   , unreachableRun
@@ -35,6 +36,7 @@ import qualified Data.Map.Strict as Map
 import Data.Map.Strict (Map)
 import qualified Data.Set as Set
 import Data.Set (Set)
+import Data.Tree (Tree (..))
 
 -- | Does a word match a pattern?
 class Matching word pattern | pattern -> word where
@@ -45,7 +47,7 @@ instance Categorized token
   => Matching [token] (Transducer token) where
     word =~ et = acceptsChart n chart
       where
-        (n, chart) = prefixRun et word
+        (n, chart) = prefixGen et word
 instance Categorized token
   => Matching [token] (Bnf (RegEx token)) where
     word =~ bnf = word =~ transducer bnf
@@ -227,12 +229,118 @@ transducer (Bnf start rules) = Transducer
             , bypass0 || bypass1
             )
 
-prefixRun
+parseForestRun
+  :: Categorized token
+  => Transducer token
+  -> [token]
+  -> ([Tree (String, Int, Int, [token])], [token]) -- ^ parse forest spans & remaining unparsed tokens
+parseForestRun et word = (concat (itemForests Set.empty Nothing 0 acceptedLen 0), drop acceptedLen word)
+  where
+    (n, chart) = prefixGen et word
+    relations = transducerRelations et
+    acceptedLen = maximum [j | j <- [0 .. n], acceptsChart j chart]
+
+    acceptedWord = take acceptedLen word
+    sliceAt start end = take (end - start) (drop start acceptedWord)
+    itemsAt j = IntMap.findWithDefault IntMap.empty j chart
+    ruleInfo name = Map.findWithDefault (IntSet.empty, False) name (transducerRules et)
+
+    edgesAt :: IntMap (IntMap [edge]) -> Int -> Int -> [edge]
+    edgesAt table pos stateId =
+      IntMap.findWithDefault [] stateId (IntMap.findWithDefault IntMap.empty pos table)
+
+    insertEdges :: edge -> IntSet -> IntMap [edge] -> IntMap [edge]
+    insertEdges edge dests acc = IntSet.foldr
+      (\stateId m -> IntMap.insertWith (++) stateId [edge] m)
+      acc
+      dests
+
+    scanBack = IntMap.fromList
+      [ (end, backRow (end - 1) input)
+      | (end, input) <- zip [1 .. acceptedLen] acceptedWord
+      ]
+      where
+        backRow prev input = IntMap.foldrWithKey step IntMap.empty (itemsAt prev)
+          where
+            step prevState origins acc = case IntMap.lookup prevState relations of
+              Just (TransitionTokenClass cls dests) | tokenClass cls input ->
+                insertEdges (prevState, origins) dests acc
+              _ -> acc
+
+    completeBack = IntMap.fromList
+      [ (split, IntMap.foldrWithKey step IntMap.empty (itemsAt split))
+      | split <- [0 .. acceptedLen]
+      ]
+      where
+        step caller origins acc = case IntMap.lookup caller relations of
+          Just (TransitionNonTerminal name dests) ->
+            insertEdges (caller, origins, name) dests acc
+          _ -> acc
+
+    ruleFinals = IntMap.foldrWithKey finalStates Map.empty relations
+    finalStates stateId step acc = case step of
+      EmitNonTerminal name -> Map.insert name stateId acc
+      _ -> acc
+
+    entryStates Nothing = transducerStarts et
+    entryStates (Just name) = fst (ruleInfo name)
+
+    ruleNullable = snd . ruleInfo
+
+    itemForests guards entry origin end stateId
+      | Set.member itemKey guards = []
+      | otherwise = baseForests <> scannedForests <> completedForests
+      where
+        itemKey = Left (entry, origin, end, stateId)
+        guards' = Set.insert itemKey guards
+
+        baseForests
+          | end == origin && IntSet.member stateId (entryStates entry) = [[]]
+          | otherwise = []
+
+        scannedForests
+          | end <= origin = []
+          | otherwise =
+              [ forest
+              | (prevState, origins) <- edgesAt scanBack end stateId
+              , IntSet.member origin origins
+              , let prev = end - 1
+              , forest <- itemForests guards' entry origin prev prevState
+              ]
+
+        completedForests =
+          [ prefix <> [subtree]
+          | split <- [origin .. end]
+          , (caller, origins, name) <- edgesAt completeBack split stateId
+          , IntSet.member origin origins
+          , prefix <- itemForests guards' entry origin split caller
+          , subtree <- ruleTrees guards' name split end
+          ]
+
+    ruleTrees guards name start end
+      | Set.member ruleKey guards = []
+      | otherwise = nullableTrees <> derivedTrees
+      where
+        ruleKey = Right (name, start, end)
+        guards' = Set.insert ruleKey guards
+
+        nullableTrees
+          | start == end && ruleNullable name = [Node (name, start, end, []) []]
+          | otherwise = []
+
+        derivedTrees = case Map.lookup name ruleFinals of
+          Nothing -> []
+          Just finalState ->
+            [ Node (name, start, end, sliceAt start end) subtrees
+            | subtrees <- itemForests guards' (Just name) start end finalState
+            ]
+
+prefixGen
   :: Categorized token
   => Transducer token
   -> [token]
   -> (Int, IntMap (IntMap IntSet))
-prefixRun et word = go 0 (initialChart et) word
+prefixGen et word = go 0 (initialChart et) word
   where
     go j chart [] = (j, chart)
     go j chart (x : xs) =
@@ -260,7 +368,7 @@ expectedRun
   => Transducer token -> [token] {- ^ prefix -} -> TokenClass token
 expectedRun et word = anyB fst (scanClassOptions et n chart)
   where
-    (n, chart) = prefixRun et word
+    (n, chart) = prefixGen et word
 
 {- |
 Rule names that can never be entered from the start
diff --git a/test/Main.hs b/test/Main.hs
index 610e5363..cdb2ca76 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -8,6 +8,7 @@ import Data.Function (fix)
 import Data.List (genericLength)
 import Data.Maybe (isJust)
 import Data.Profunctor.Types (Star (..))
+import Data.Tree (Tree (..))
 import System.Environment (lookupEnv)
 import Test.DocTest
 import Test.Hspec
@@ -40,10 +41,33 @@ main = do
     describe "lambdaGrammar" $ testCfg True lambdaExamples lambdaGrammar
     describe "lenvecGrammar" $ testCsg True lenvecExamples lenvecGrammar
     describe "chainGrammar" $ testCfg True chainExamples chainGrammar
+    describe "parseForestRun" parseForestRunTests
     describe "Parsector try rollback" tryRollbackTests
     describe "Kleene" kleeneProperties
     describe "meander" meanderProperties
 
+parseForestRunTests :: Spec
+parseForestRunTests = do
+  it "returns the nested rule forest for a full parse" $ do
+    let (actualForest, actualRest) = parseForestRun (transducerG arithGrammar) "2*3+4;;;"
+    actualForest `shouldBe`
+      [ Node ("arith", 0, 5, "2*3+4")
+          [ Node ("sum", 0, 5, "2*3+4")
+              [ Node ("product", 0, 3, "2*3")
+                  [ Node ("factor", 0, 1, "2")
+                      [Node ("number", 0, 1, "2") []]
+                  , Node ("factor", 2, 3, "3")
+                      [Node ("number", 2, 3, "3") []]
+                  ]
+              , Node ("product", 4, 5, "4")
+                  [ Node ("factor", 4, 5, "4")
+                      [Node ("number", 4, 5, "4") []]
+                  ]
+              ]
+          ]
+      ]
+    actualRest `shouldBe` ";;;"
+
 tryRollbackTests :: Spec
 tryRollbackTests = do
   it "rolls back parse stream/offset on failed try" $ do

From 2bc2aa89b4e9c32c83fa34e02d6b01437fec5b52 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 17:06:40 -0700
Subject: [PATCH 25/31] Update Machine.hs

---
 src/Control/Lens/Grammar/Machine.hs | 34 +++++++++++++++--------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index 89fe0e2d..20251421 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -13,10 +13,10 @@ module Control.Lens.Grammar.Machine
     Matching (..)
     -- * Transducer
   , transducer
-  , parseForestRun
-  , languageRun
-  , expectedRun
-  , unreachableRun
+  , parseForestGen
+  , languageGen
+  , expectedGen
+  , unreachableGen
   , Transducer (..)
   , TransducerStep (..)
   ) where
@@ -94,7 +94,8 @@ and McIlroy, [Enumerating the strings of regular languages]
 (https://www.cs.dartmouth.edu/~doug/nfa.pdf).
 
 A transducer is a form of finite state machine
-that can be run in various ways like `=~`, `expectedRun`, `languageRun` & `unreachableRun`.
+that can be run in various ways like
+`=~`, `expectedGen`, `languageGen`, `parseForestGen` & `unreachableGen`.
 -}
 transducer :: Bnf (RegEx token) -> Transducer token
 transducer (Bnf start rules) = Transducer
@@ -229,12 +230,13 @@ transducer (Bnf start rules) = Transducer
             , bypass0 || bypass1
             )
 
-parseForestRun
+parseForestGen
   :: Categorized token
   => Transducer token
   -> [token]
-  -> ([Tree (String, Int, Int, [token])], [token]) -- ^ parse forest spans & remaining unparsed tokens
-parseForestRun et word = (concat (itemForests Set.empty Nothing 0 acceptedLen 0), drop acceptedLen word)
+  -> ([Tree (String, Int, Int, [token])], [token])
+  {- ^ parse forest & remaining unparsed tokens -}
+parseForestGen et word = (concat (itemForests Set.empty Nothing 0 acceptedLen 0), drop acceptedLen word)
   where
     (n, chart) = prefixGen et word
     relations = transducerRelations et
@@ -358,15 +360,15 @@ prefixGen et word = go 0 (initialChart et) word
           _ -> acc
 
 {- | What token is expected next?
-The scanner frontier, `expectedRun` returns the `TokenClass`
+The scanner frontier, `expectedGen` returns the `TokenClass`
 that can be scanned next after the given input prefix.
 A `falseB` result means the current chart has no scanner transitions,
 i.e. the prefix is a dead end for recognition.
 -}
-expectedRun
+expectedGen
   :: Categorized token
   => Transducer token -> [token] {- ^ prefix -} -> TokenClass token
-expectedRun et word = anyB fst (scanClassOptions et n chart)
+expectedGen et word = anyB fst (scanClassOptions et n chart)
   where
     (n, chart) = prefixGen et word
 
@@ -375,8 +377,8 @@ Rule names that can never be entered from the start
 expression — dead productions. A non-empty result is a grammar-hygiene
 warning: those rules can be deleted without changing the recognized language.
 -}
-unreachableRun :: Transducer token -> Set String
-unreachableRun et =
+unreachableGen :: Transducer token -> Set String
+unreachableGen et =
   Map.keysSet (transducerRules et) `Set.difference` called
   where
     called = bfs (transducerStarts et) IntSet.empty Set.empty
@@ -397,16 +399,16 @@ unreachableRun et =
       Nothing -> (acc, cs)
 
 {- |
-`languageRun` lazily produces all words in a language from shortest to longest.
+`languageGen` lazily produces all words in a language from shortest to longest.
 However since `TokenClass`es can resolve to infinite sets of tokens,
 and the relevant case of `Char` tokens while not infinite is huge,
 it samples tokens in an `Applicative` `TokenAlgebra`.
 -}
-languageRun
+languageGen
   :: (Applicative f, TokenAlgebra token (f token))
   => Transducer token -- ^ transducer
   -> f [[token]]
-languageRun et = sequenceA (fmap sampleWord classWords)
+languageGen et = sequenceA (fmap sampleWord classWords)
   where
 
     classWords = enumerateByLength [(0, [], initialChart et)] Set.empty

From 9a18679685d8832040a6aa28ac190e25a2c4cd8c Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 17:07:03 -0700
Subject: [PATCH 26/31] Update Main.hs

---
 test/Main.hs | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/test/Main.hs b/test/Main.hs
index cdb2ca76..57abd412 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -41,15 +41,15 @@ main = do
     describe "lambdaGrammar" $ testCfg True lambdaExamples lambdaGrammar
     describe "lenvecGrammar" $ testCsg True lenvecExamples lenvecGrammar
     describe "chainGrammar" $ testCfg True chainExamples chainGrammar
-    describe "parseForestRun" parseForestRunTests
+    describe "parseForestGen" parseForestGenTests
     describe "Parsector try rollback" tryRollbackTests
     describe "Kleene" kleeneProperties
     describe "meander" meanderProperties
 
-parseForestRunTests :: Spec
-parseForestRunTests = do
+parseForestGenTests :: Spec
+parseForestGenTests = do
   it "returns the nested rule forest for a full parse" $ do
-    let (actualForest, actualRest) = parseForestRun (transducerG arithGrammar) "2*3+4;;;"
+    let (actualForest, actualRest) = parseForestGen (transducerG arithGrammar) "2*3+4;;;"
     actualForest `shouldBe`
       [ Node ("arith", 0, 5, "2*3+4")
           [ Node ("sum", 0, 5, "2*3+4")
@@ -85,9 +85,22 @@ tryRollbackTests = do
 
 doctests :: IO ()
 doctests = do
+  stackExe <- lookupEnv "STACK_EXE"
+  ghcEnvironment <- lookupEnv "GHC_ENVIRONMENT"
   let
     modulePaths =
       [ "src/Control/Lens/Grammar.hs" ]
+    sourceDirs =
+      [ "-isrc"
+      , "-itest"
+      ]
+    packageEnvFlags = case ghcEnvironment of
+      Just "-" -> []
+      Just path -> ["-package-env=" <> path]
+      Nothing -> []
+    runnerFlags
+      | isJust stackExe = []
+      | otherwise = sourceDirs <> packageEnvFlags
     languageExtensions =
       [ "-XAllowAmbiguousTypes"
       , "-XArrows"
@@ -129,7 +142,7 @@ doctests = do
   for_ modulePaths $ \modulePath -> do
     putStr "Testing module documentation in "
     putStrLn modulePath
-    doctest (modulePath : languageExtensions)
+    doctest (modulePath : runnerFlags <> languageExtensions)
 
 meanderProperties :: Spec
 meanderProperties =

From 8dbaf4e612316d5b2e3b9ba6f2f69103b343e974 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 17:40:07 -0700
Subject: [PATCH 27/31] docs

---
 src/Control/Lens/Grammar.hs         | 78 ++++++++++++++++++++---------
 src/Control/Lens/Grammar/Machine.hs | 26 +++++-----
 test/Main.hs                        |  2 +-
 3 files changed, 69 insertions(+), 37 deletions(-)

diff --git a/src/Control/Lens/Grammar.hs b/src/Control/Lens/Grammar.hs
index 11b2ad0a..b442dc90 100644
--- a/src/Control/Lens/Grammar.hs
+++ b/src/Control/Lens/Grammar.hs
@@ -25,7 +25,6 @@ module Control.Lens.Grammar
   , regbnfGrammar
   , applicativeG
   , transducerG
-  , languageG
     -- * Context-sensitive grammar
   , CtxGrammar
   , printG
@@ -795,35 +794,68 @@ regbnfG bnf = runGrammor bnf
 
 {- | Compile a `Grammar` into a `Transducer`.
 
-A transducer is a form of finite state machine
-that can be run in various ways like `=~`, `expectedRun`, `languageRun` & `unreachableRun`.
--}
-transducerG :: Categorized token => Grammar token a -> Transducer token
-transducerG bnf = transducer (runGrammor bnf)
+>>> let regexMachine = transducerG @Char regexGrammar
 
-{- |
-`languageG` lazily produces all words in a language from shortest to longest.
-However since `TokenClass`es can resolve to infinite sets of tokens,
-and the relevant case of `Char` tokens while not infinite is huge,
-it samples tokens in an `Applicative` `TokenAlgebra`.
+A transducer is a form of finite state machine,
+usable as an intermediary for further generators like
+`=~`, `expectNext`, `languageSample`, `parseForestGen` & `unreachableRules`.
 
+>>> import Test.QuickCheck
+>>> let regexLang = languageSample @Char regexMachine
+>>> words100 <- generate (take 100 <$> regexLang)
+>>> quickCheck (property (all (=~ regexMachine) words100))
++++ OK, passed 1 test.
 >>> import Control.Monad.State
 >>> import System.Random
 >>> let gen = mkStdGen 69
->>> evalState (take 10 <$> languageG @Char regexGrammar) gen
-["","|","\776269","()","[]","\\[","||","|\249908","\770923*","\1008821+"]
-
-This is useful for generating valid language examples for property tests.
-
->>> import Test.QuickCheck
->>> let rg = regbnfG regexGrammar
->>> words100 <- generate (take 100 <$> languageG @Char regexGrammar)
->>> quickCheckWith stdArgs {maxSuccess = 1} (property (all (=~ rg) words100))
-+++ OK, passed 1 test.
+>>> evalState (take 15 <$> regexLang) gen
+["","|","\776269","()","[]","\\[","||","|\249908","\770923*","\1008821+","\318904?","\845807|","\477898\1026934","()*","()+"]
+
+>>> import Data.Tree (drawForest)
+
+@>>> let (forest, _) = parseForestGen regexMachine "xy|z" in putStr (drawForest (map (fmap show) forest))
+("regex",0,4,"xy|z")
+|
+`- ("alternate",0,4,"xy|z")
+   |
+   +- ("sequence",0,2,"xy")
+   |  |
+   |  +- ("expression",0,1,"x")
+   |  |  |
+   |  |  `- ("atom",0,1,"x")
+   |  |     |
+   |  |     `- ("class",0,1,"x")
+   |  |        |
+   |  |        `- ("class-one-of",0,1,"x")
+   |  |           |
+   |  |           `- ("char",0,1,"x")
+   |  |
+   |  `- ("expression",1,2,"y")
+   |     |
+   |     `- ("atom",1,2,"y")
+   |        |
+   |        `- ("class",1,2,"y")
+   |           |
+   |           `- ("class-one-of",1,2,"y")
+   |              |
+   |              `- ("char",1,2,"y")
+   |
+   `- ("sequence",3,4,"z")
+      |
+      `- ("expression",3,4,"z")
+         |
+         `- ("atom",3,4,"z")
+            |
+            `- ("class",3,4,"z")
+               |
+               `- ("class-one-of",3,4,"z")
+                  |
+                  `- ("char",3,4,"z")
+@
 
 -}
-languageG :: (TokenAlgebra token (f token), Applicative f) => Grammar token a -> f [[token]]
-languageG bnf = languageRun (transducerG bnf)
+transducerG :: Categorized token => Grammar token a -> Transducer token
+transducerG bnf = transducer (runGrammor bnf)
 
 {- | `printG` generates a printer from a `CtxGrammar`.
 Since both `RegGrammar`s and context-free `Grammar`s are `CtxGrammar`s,
diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index 20251421..f3aa9989 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -14,9 +14,9 @@ module Control.Lens.Grammar.Machine
     -- * Transducer
   , transducer
   , parseForestGen
-  , languageGen
-  , expectedGen
-  , unreachableGen
+  , languageSample
+  , expectNext
+  , unreachableRules
   , Transducer (..)
   , TransducerStep (..)
   ) where
@@ -95,7 +95,7 @@ and McIlroy, [Enumerating the strings of regular languages]
 
 A transducer is a form of finite state machine
 that can be run in various ways like
-`=~`, `expectedGen`, `languageGen`, `parseForestGen` & `unreachableGen`.
+`=~`, `expectNext`, `languageSample`, `parseForestGen` & `unreachableRules`.
 -}
 transducer :: Bnf (RegEx token) -> Transducer token
 transducer (Bnf start rules) = Transducer
@@ -360,15 +360,15 @@ prefixGen et word = go 0 (initialChart et) word
           _ -> acc
 
 {- | What token is expected next?
-The scanner frontier, `expectedGen` returns the `TokenClass`
+The scanner frontier, `expectNext` returns the `TokenClass`
 that can be scanned next after the given input prefix.
 A `falseB` result means the current chart has no scanner transitions,
 i.e. the prefix is a dead end for recognition.
 -}
-expectedGen
+expectNext
   :: Categorized token
   => Transducer token -> [token] {- ^ prefix -} -> TokenClass token
-expectedGen et word = anyB fst (scanClassOptions et n chart)
+expectNext et word = anyB fst (scanClassOptions et n chart)
   where
     (n, chart) = prefixGen et word
 
@@ -377,8 +377,8 @@ Rule names that can never be entered from the start
 expression — dead productions. A non-empty result is a grammar-hygiene
 warning: those rules can be deleted without changing the recognized language.
 -}
-unreachableGen :: Transducer token -> Set String
-unreachableGen et =
+unreachableRules :: Transducer token -> Set String
+unreachableRules et =
   Map.keysSet (transducerRules et) `Set.difference` called
   where
     called = bfs (transducerStarts et) IntSet.empty Set.empty
@@ -399,16 +399,16 @@ unreachableGen et =
       Nothing -> (acc, cs)
 
 {- |
-`languageGen` lazily produces all words in a language from shortest to longest.
+`languageSample` lazily produces all words in a language from shortest to longest.
 However since `TokenClass`es can resolve to infinite sets of tokens,
 and the relevant case of `Char` tokens while not infinite is huge,
 it samples tokens in an `Applicative` `TokenAlgebra`.
 -}
-languageGen
-  :: (Applicative f, TokenAlgebra token (f token))
+languageSample
+  :: (TokenAlgebra token (f token), Applicative f)
   => Transducer token -- ^ transducer
   -> f [[token]]
-languageGen et = sequenceA (fmap sampleWord classWords)
+languageSample et = sequenceA (fmap sampleWord classWords)
   where
 
     classWords = enumerateByLength [(0, [], initialChart et)] Set.empty
diff --git a/test/Main.hs b/test/Main.hs
index 57abd412..61526fba 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -164,7 +164,7 @@ testCfg isLL1 examples grammar = do
       actualMatch `shouldBe` True
   describe "generated languageG" $ do
     it "should parses with exactly one full parse" $ do
-      generated <- generate (take 100 <$> languageG grammar)
+      generated <- generate (take 100 <$> languageSample (transducerG grammar))
       for_ generated $ \word -> do
         let fullParses = [() | (_, "") <- parseG grammar word]
         fullParses `shouldBe` [()]

From 726ae7b93d848613be4d93fde1f482b9cc54e4c3 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 17:51:21 -0700
Subject: [PATCH 28/31] kleaning

---
 src/Control/Lens/Grammar.hs         |  4 ++--
 src/Control/Lens/Grammar/Machine.hs | 11 ++++++-----
 test/Main.hs                        |  8 ++++----
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/Control/Lens/Grammar.hs b/src/Control/Lens/Grammar.hs
index b442dc90..8ac3779e 100644
--- a/src/Control/Lens/Grammar.hs
+++ b/src/Control/Lens/Grammar.hs
@@ -798,7 +798,7 @@ regbnfG bnf = runGrammor bnf
 
 A transducer is a form of finite state machine,
 usable as an intermediary for further generators like
-`=~`, `expectNext`, `languageSample`, `parseForestGen` & `unreachableRules`.
+`=~`, `expectNext`, `languageSample`, `parseForest` & `unreachableRules`.
 
 >>> import Test.QuickCheck
 >>> let regexLang = languageSample @Char regexMachine
@@ -813,7 +813,7 @@ usable as an intermediary for further generators like
 
 >>> import Data.Tree (drawForest)
 
-@>>> let (forest, _) = parseForestGen regexMachine "xy|z" in putStr (drawForest (map (fmap show) forest))
+@>>> let (forest, _) = parseForest regexMachine "xy|z" in putStr (drawForest (map (fmap show) forest))
 ("regex",0,4,"xy|z")
 |
 `- ("alternate",0,4,"xy|z")
diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index f3aa9989..369af27e 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -13,7 +13,7 @@ module Control.Lens.Grammar.Machine
     Matching (..)
     -- * Transducer
   , transducer
-  , parseForestGen
+  , parseForest
   , languageSample
   , expectNext
   , unreachableRules
@@ -95,7 +95,7 @@ and McIlroy, [Enumerating the strings of regular languages]
 
 A transducer is a form of finite state machine
 that can be run in various ways like
-`=~`, `expectNext`, `languageSample`, `parseForestGen` & `unreachableRules`.
+`=~`, `expectNext`, `languageSample`, `parseForest` & `unreachableRules`.
 -}
 transducer :: Bnf (RegEx token) -> Transducer token
 transducer (Bnf start rules) = Transducer
@@ -230,13 +230,14 @@ transducer (Bnf start rules) = Transducer
             , bypass0 || bypass1
             )
 
-parseForestGen
+{- | The parse forest of a string of tokens. -}
+parseForest
   :: Categorized token
   => Transducer token
-  -> [token]
+  -> [token] -- ^ string
   -> ([Tree (String, Int, Int, [token])], [token])
   {- ^ parse forest & remaining unparsed tokens -}
-parseForestGen et word = (concat (itemForests Set.empty Nothing 0 acceptedLen 0), drop acceptedLen word)
+parseForest et word = (concat (itemForests Set.empty Nothing 0 acceptedLen 0), drop acceptedLen word)
   where
     (n, chart) = prefixGen et word
     relations = transducerRelations et
diff --git a/test/Main.hs b/test/Main.hs
index 61526fba..02d4dbbf 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -41,15 +41,15 @@ main = do
     describe "lambdaGrammar" $ testCfg True lambdaExamples lambdaGrammar
     describe "lenvecGrammar" $ testCsg True lenvecExamples lenvecGrammar
     describe "chainGrammar" $ testCfg True chainExamples chainGrammar
-    describe "parseForestGen" parseForestGenTests
+    describe "parseForest" parseForestTests
     describe "Parsector try rollback" tryRollbackTests
     describe "Kleene" kleeneProperties
     describe "meander" meanderProperties
 
-parseForestGenTests :: Spec
-parseForestGenTests = do
+parseForestTests :: Spec
+parseForestTests = do
   it "returns the nested rule forest for a full parse" $ do
-    let (actualForest, actualRest) = parseForestGen (transducerG arithGrammar) "2*3+4;;;"
+    let (actualForest, actualRest) = parseForest (transducerG arithGrammar) "2*3+4;;;"
     actualForest `shouldBe`
       [ Node ("arith", 0, 5, "2*3+4")
           [ Node ("sum", 0, 5, "2*3+4")

From 90fab479cc14525a97a20b0051b80037493729c8 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 18:08:57 -0700
Subject: [PATCH 29/31] Update Kleene.hs

---
 src/Control/Lens/Grammar/Kleene.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Control/Lens/Grammar/Kleene.hs b/src/Control/Lens/Grammar/Kleene.hs
index 85c54c1f..fe8018a5 100644
--- a/src/Control/Lens/Grammar/Kleene.hs
+++ b/src/Control/Lens/Grammar/Kleene.hs
@@ -233,7 +233,7 @@ instance TokenAlgebra Char (ReadP Char) where
   tokenClass = ReadP.satisfy . tokenClass
 instance (Categorized token, Arbitrary token) => TokenAlgebra token (Gen token) where
   tokenClass (TokenClass exam) = case exam of
-    OneOf xs -> Gen.elements (toList xs)
+    OneOf xs -> oneOf xs
     NotOneOf xs (AndAsIn cat) -> arbitrary `Gen.suchThat`
       (\x -> x `notElem` xs && categorize x == cat)
     NotOneOf xs (AndNotAsIn cats) -> arbitrary `Gen.suchThat`

From 49beb1863a0d7ed928f08eadf65d157e862612c7 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 18:18:36 -0700
Subject: [PATCH 30/31] update

---
 CHANGELOG.md       | 33 +++++++++++++++++++++++++++++++++
 distributors.cabal |  2 +-
 package.yaml       |  2 +-
 test/Main.hs       |  4 ++--
 4 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e610530f..83402d2c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,38 @@
 # Changelog for `distributors`
 
+## 0.6.0.0 - 2026-04-24
+
+### New Module
+
+- Added `Control.Lens.Grammar.Machine` as the transducer/matching runtime layer.
+
+### New Types
+
+- Added `Transducer` and `TransducerStep` as the finite-state representation for
+  compiled grammar machines.
+
+### New APIs (Machine Runtime)
+
+- Added `transducer` to compile `Bnf (RegEx token)` into `Transducer`.
+- Added `parseForest` to reconstruct parse forests with rule labels and token spans/slices,
+  returning the remaining unparsed suffix.
+- Added `expectNext` to compute scanner-frontier expected token classes after a prefix.
+- Added `languageSample` to lazily enumerate sampled language words from shortest length upward.
+- Added `unreachableRules` to report dead nonterminals unreachable from the start expression.
+
+### Internal Machinery
+
+- Implemented Thompson-style transducer construction over `RegEx`-extended BNF.
+- Implemented Earley-style chart runtime (`initialChart`, `closeChartAt`, `scanClassOptions`,
+  `prefixGen`) with predict/complete closure and scanner grouping.
+- Added completion-time caller indexing/cache optimizations and precomputed rule nullability/first-state
+  indexing to speed machine execution.
+
+### Grammar Integration
+
+- `Control.Lens.Grammar` now exposes machine-backed generators:
+  `transducerG` and the parse-forest examples/docs built on the Machine runtime.
+
 ## 0.5.0.0 - 2026-04-16
 
 ### Changes
diff --git a/distributors.cabal b/distributors.cabal
index c620f938..2794cd48 100644
--- a/distributors.cabal
+++ b/distributors.cabal
@@ -5,7 +5,7 @@ cabal-version: 2.2
 -- see: https://github.com/sol/hpack
 
 name:           distributors
-version:        0.5.0.0
+version:        0.6.0.0
 synopsis:       Unifying Parsers, Printers & Grammars
 description:    Distributors provides mathematically inspired abstractions for coders to write parsers that can also be inverted to printers.
 category:       Profunctors, Optics, Parsing
diff --git a/package.yaml b/package.yaml
index 1e652244..4319745c 100644
--- a/package.yaml
+++ b/package.yaml
@@ -1,5 +1,5 @@
 name: distributors
-version: 0.5.0.0
+version: 0.6.0.0
 github: "morphismtech/distributors"
 license: BSD-3-Clause
 author: "Eitan Chatav"
diff --git a/test/Main.hs b/test/Main.hs
index 02d4dbbf..f73df07f 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -162,8 +162,8 @@ testCfg isLL1 examples grammar = do
     it ("should match " <> expectedString <> " correctly") $ do
       let actualMatch = expectedString =~ regbnfG grammar
       actualMatch `shouldBe` True
-  describe "generated languageG" $ do
-    it "should parses with exactly one full parse" $ do
+  describe "transducerG" $ do
+    it "should generate the hundred shorted valid words in a language" $ do
       generated <- generate (take 100 <$> languageSample (transducerG grammar))
       for_ generated $ \word -> do
         let fullParses = [() | (_, "") <- parseG grammar word]

From c5813b5cecc1fea9a8f677052fd28d6f72d7c7d0 Mon Sep 17 00:00:00 2001
From: Eitan Chatav <eitan.chatav@gmail.com>
Date: Fri, 24 Apr 2026 18:38:15 -0700
Subject: [PATCH 31/31] Update Machine.hs

---
 src/Control/Lens/Grammar/Machine.hs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Control/Lens/Grammar/Machine.hs b/src/Control/Lens/Grammar/Machine.hs
index 369af27e..4f778b95 100644
--- a/src/Control/Lens/Grammar/Machine.hs
+++ b/src/Control/Lens/Grammar/Machine.hs
@@ -77,6 +77,7 @@ data Transducer token = Transducer
   , transducerRules :: Map String (IntSet, Bool)
   -- ^ an index into `transducerRelations` for nonterminals with precomputed nullability
   , transducerStarts :: IntSet
+  -- ^ an index into `transducerRelations` for the starting rule
   }
 
 -- | A `TransducerStep` in a `Transducer`.