From db25c26238862bd74be2755a9bf8ede934051bc0 Mon Sep 17 00:00:00 2001
From: Martin Di Paola <martinp.dipaola@gmail.com>
Date: Fri, 20 Mar 2026 13:09:13 -0300
Subject: [PATCH 1/7] build: hide build/

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)
diff --git a/.gitignore b/.gitignore
index 591d6c5..7ce35ac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ prof-traces
 test/ds/good.args
 test/autocomplete_byexample.sh
 .check-secrets-impl.sh
+build/

From 47cb9843dd5c0e9b377f50012b41e185ab8800cd Mon Sep 17 00:00:00 2001
From: Martin Di Paola <martinp.dipaola@gmail.com>
Date: Fri, 20 Mar 2026 13:32:37 -0300
Subject: [PATCH 2/7] feat: impl #275 ignoring empty lines at begin of got
 string

---
 byexample/parser.py    | 14 ++++-----
 byexample/parser_sm.py | 71 +++++++++++++++++++++++++++++++-----------
 2 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/byexample/parser.py b/byexample/parser.py
index 36dcba0..caa8e31 100644
--- a/byexample/parser.py
+++ b/byexample/parser.py
@@ -262,8 +262,8 @@ def expected_as_regexs(
 
         We return the regexs
 
-            >>> regexs
-            ('\\A', 'a', '(?P<foo>.*?)', 'b', '(?P<bar>.*?)', 'c', '\\n*\\Z')
+            >>> regexs          # byexample: +norm-ws
+            ('\\A(?:[ \\t]*\\n)*?', 'a', '(?P<foo>.*?)', 'b', '(?P<bar>.*?)', 'c', '\\n*\\Z')
 
             >>> m = re.compile(''.join(regexs), re.MULTILINE | re.DOTALL)
             >>> m.match('axxbyyyc').groups()
@@ -300,7 +300,7 @@ def expected_as_regexs(
             >>> regexs, _, _, tags_by_idx, _ = _as_regexs(expected, normalize_whitespace=True)
 
             >>> regexs          # byexample: +norm-ws
-            ('\\A', 'a', '(?:.*?)(?<!\\s)', '\\s+(?!\\s)', '(?P<foo_bar>.*?)', 'c', '\\s*\\Z')
+            ('\\A\\s*?', 'a', '(?:.*?)(?<!\\s)', '\\s+(?!\\s)', '(?P<foo_bar>.*?)', 'c', '\\s*\\Z')
 
             >>> tags_by_idx
             {2: None, 4: 'foo-bar'}
@@ -315,7 +315,7 @@ def expected_as_regexs(
             >>> regexs, _, _, tags_by_idx, _ = _as_regexs(expected)
 
             >>> regexs
-            ('\\A', 'a<foo>b<bar>c', '\\n*\\Z')
+            ('\\A(?:[ \\t]*\\n)*?', 'a<foo>b<bar>c', '\\n*\\Z')
 
             >>> tags_by_idx
             {}
@@ -324,7 +324,7 @@ def expected_as_regexs(
             >>> regexs, _, _, tags_by_idx, _ = _as_regexs(expected)
 
             >>> regexs
-            ('\\A', 'a', '(?:.*?)', 'b<bar>c', '\\n*\\Z')
+            ('\\A(?:[ \\t]*\\n)*?', 'a', '(?:.*?)', 'b<bar>c', '\\n*\\Z')
 
             >>> tags_by_idx
             {2: None}
@@ -433,7 +433,7 @@ def _extend_parser_and_parse_options_strictly_and_cache(self, optlist):
 >>> regexs, _, _, _, _ = _as_regexs(expected, normalize_whitespace=True)
 
 >>> regexs
-('\\A',
+('\\A\\s*?',
  'ex',
  '\\s',
  '(?:\\s*(?!\\s)(?:.+)(?<!\\s))?',
@@ -450,7 +450,7 @@ def _extend_parser_and_parse_options_strictly_and_cache(self, optlist):
 >>> regexs, _, _, _, _ = _as_regexs(expected, normalize_whitespace=True)
 
 >>> regexs
-('\\A',
+('\\A\\s*?',
  'ex',
  '\\s',
  '(?:\\s*(?!\\s)(?P<foo>.+?)(?<!\\s))?',
diff --git a/byexample/parser_sm.py b/byexample/parser_sm.py
index 1ee9c42..342a0ec 100644
--- a/byexample/parser_sm.py
+++ b/byexample/parser_sm.py
@@ -465,10 +465,20 @@ def expected_tokenizer(self, expected_str, tags_enabled, input_enabled):
             input_match = None
         yield (charno, 'end', None)
 
+    def _begin_of_string_regex(self):
+        r'''
+        Return the regex to match the beginning of the got string.
+
+        By default it is just the start-of-string anchor \A.
+        Subclasses can override this to also skip any leading
+        empty/whitespace-only lines in the got.
+        '''
+        return r'\A'
+
     @log_context('byexample.parser')
     def parse(self, expected, tags_enabled, input_enabled):
         self.reset()
-        self.emit(0, r'\A', 0)
+        self.emit(0, self._begin_of_string_regex(), 0)
 
         tokenizer = self.expected_tokenizer(
             expected, tags_enabled, input_enabled
@@ -671,6 +681,17 @@ def __init__(
     def trailing_whitespace_regex(self):
         return re.compile(r'\s*\Z', re.MULTILINE | re.DOTALL)
 
+    def _begin_of_string_regex(self):
+        r'''
+        Skip any leading whitespace (including empty lines) in the got.
+
+        In norm-ws mode all whitespace is equivalent, so leading
+        spaces and newlines can be freely skipped.
+        Non-greedy *? is used so that the next part of the regex
+        (e.g. a tag's \s+ lookahead) can still match the whitespace.
+        '''
+        return r'\A\s*?'
+
     def emit_ws(self, just_one=False):
         charno, _ = self.pull()
         if just_one:
@@ -816,7 +837,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> r, p, c, _, _ = _as_regexs('a  \n   b  \t\vc')
 
             >>> r
-            ('\\A', 'a', '\\s+(?!\\s)', 'b', '\\s+(?!\\s)', 'c', '\\s*\\Z')
+            ('\\A\\s*?', 'a', '\\s+(?!\\s)', 'b', '\\s+(?!\\s)', 'c', '\\s*\\Z')
 
             >>> match(r, 'a b c') is not None
             True
@@ -845,7 +866,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, p, _, _, _ = _as_regexs(expected)
 
             >>> regexs
-            ('\\A', 'a', '(?P<foo>.*?)', 'b', '\\s*\\Z')
+            ('\\A\\s*?', 'a', '(?P<foo>.*?)', 'b', '\\s*\\Z')
 
             >>> p
             (0, 0, 1, 6, 7)
@@ -861,7 +882,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, p, _, _, _ = _as_regexs(expected)
 
             >>> regexs               # byexample: -tags
-            ('\\A', 'a', '\\s+(?!\\s)', '(?P<foo>.*?)', 'b', '\\s*\\Z')
+            ('\\A\\s*?', 'a', '\\s+(?!\\s)', '(?P<foo>.*?)', 'b', '\\s*\\Z')
 
             >>> p
             (0, 0, 1, 2, 7, 8)
@@ -873,7 +894,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, p, _, _, _ = _as_regexs(expected)
 
             >>> regexs               # byexample: -tags
-            ('\\A', 'a', '(?P<foo>.*?)(?<!\\s)', '\\s+(?!\\s)', 'b', '\\s*\\Z')
+            ('\\A\\s*?', 'a', '(?P<foo>.*?)(?<!\\s)', '\\s+(?!\\s)', 'b', '\\s*\\Z')
 
             >>> p
             (0, 0, 1, 6, 7, 8)
@@ -888,7 +909,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, p, _, _, _ = _as_regexs(expected)
 
             >>> regexs           # byexample: +norm-ws -tags
-            ('\\A', 'a', '\\s', '(?:\\s*(?!\\s)(?P<foo>.+?)(?<!\\s))?', '\\s+(?!\\s)', 'b', '\\s*\\Z')
+            ('\\A\\s*?', 'a', '\\s', '(?:\\s*(?!\\s)(?P<foo>.+?)(?<!\\s))?', '\\s+(?!\\s)', 'b', '\\s*\\Z')
 
             >>> p
             (0, 0, 1, 2, 7, 8, 9)
@@ -918,7 +939,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, p, _, _, _ = _as_regexs(expected)
 
             >>> regexs               # byexample: -tags
-            ('\\A', '(?P<foo>.*?)(?<!\\s)', '\\s*\\Z')
+            ('\\A\\s*?', '(?P<foo>.*?)(?<!\\s)', '\\s*\\Z')
 
             >>> p
             (0, 0, 5)
@@ -930,7 +951,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, p, _, _, _ = _as_regexs(expected)
 
             >>> regexs               # byexample: -tags
-            ('\\A', '\\s', '(?:\\s*(?!\\s)(?P<foo>.+?)(?<!\\s))?', '\\s*\\Z')
+            ('\\A\\s*?', '\\s', '(?:\\s*(?!\\s)(?P<foo>.+?)(?<!\\s))?', '\\s*\\Z')
 
             >>> p
             (0, 0, 1, 6)
@@ -942,7 +963,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, p, _, _, _ = _as_regexs(expected)
 
             >>> regexs               # byexample: -tags
-            ('\\A', '\\s', '(?:\\s*(?!\\s)(?P<foo>.+?)(?<!\\s))?', '\\s*\\Z')
+            ('\\A\\s*?', '\\s', '(?:\\s*(?!\\s)(?P<foo>.+?)(?<!\\s))?', '\\s*\\Z')
 
             >>> p
             (0, 0, 1, 6)
@@ -954,7 +975,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, p, _, _, _ = _as_regexs(expected)
 
             >>> regexs               # byexample: -tags
-            ('\\A', '(?P<foo>.*?)(?<!\\s)', '\\s*\\Z')
+            ('\\A\\s*?', '(?P<foo>.*?)(?<!\\s)', '\\s*\\Z')
 
             >>> p
             (0, 0, 5)
@@ -966,7 +987,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, p, _, _, _ = _as_regexs(expected)
 
             >>> regexs               # byexample: -tags
-            ('\\A', '\\s*\\Z')
+            ('\\A\\s*?', '\\s*\\Z')
 
             >>> p
             (0, 0)
@@ -975,7 +996,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, p, _, _, _ = _as_regexs(expected)
 
             >>> regexs               # byexample: -tags
-            ('\\A', '\\s*\\Z')
+            ('\\A\\s*?', '\\s*\\Z')
 
             >>> p
             (0, 0)
@@ -989,7 +1010,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, charnos, rcounts, _, input_list = _as_regexs(expected)
 
             >>> regexs              # byexample: +norm-ws
-            ('\\A', 'username', '\\s+(?!\\s)', '\\[john\\]', '\\s+(?!\\s)',
+            ('\\A\\s*?', 'username', '\\s+(?!\\s)', '\\[john\\]', '\\s+(?!\\s)',
              'pass', '\\s+(?!\\s)', '\\[admin\\]', '\\s+(?!\\s)',
              'comment', '\\s+(?!\\s)', '\\[', '\\s+(?!\\s)', 'none', '\\s+(?!\\s)', '\\]',
              '\\s*\\Z')
@@ -1021,6 +1042,18 @@ def __init__(
     def trailing_newlines_regex(self):
         return re.compile(r'\n*\Z', re.MULTILINE | re.DOTALL)
 
+    def _begin_of_string_regex(self):
+        r'''
+        Skip any leading empty or whitespace-only lines in the got.
+
+        A non-greedy *? is used to avoid consuming lines that the
+        expected regex (e.g. a tag) may need to match itself.
+        This is safe and non-pathological: each iteration of the
+        group consumes at least one \n, so the total work is linear
+        in the number of leading blank lines.
+        '''
+        return r'\A(?:[ \t]*\n)*?'
+
     def emit_tag(self, ctx, endline):
         assert ctx in ('n', '0')
         return SM.emit_tag(self, ctx, endline)
@@ -1105,7 +1138,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, charnos, rcounts, tags_by_idx, input_list = _as_regexs(expected)
 
             >>> regexs              # byexample: -tags +norm-ws
-            ('\\A', 'a', '(?P<foo>.*?)', 'b', '(?P<b_b>.*?)', 'c', '(?:.*?)', 'd', '\\n*\\Z')
+            ('\\A(?:[ \\t]*\\n)*?', 'a', '(?P<foo>.*?)', 'b', '(?P<b_b>.*?)', 'c', '(?:.*?)', 'd', '\\n*\\Z')
 
             >>> match(regexs, 'axxbyyyczzd').groups()
             ('xx', 'yyy')
@@ -1152,7 +1185,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, _, rcounts, _, _ = _as_regexs(expected)
 
             >>> regexs          # byexample: +norm-ws -tags
-            ('\\A',
+            ('\\A(?:[ \\t]*\\n)*?',
              'a',
              '\\\n',
              '(?P<foo>.*?)',
@@ -1200,7 +1233,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, _, _, _, _ = _as_regexs(expected)
 
             >>> regexs          # byexample: -tags
-            ('\\A', '(?:(?P<foo>.+?)(?<!\\n))?', '\\n*\\Z')
+            ('\\A(?:[ \\t]*\\n)*?', '(?:(?P<foo>.+?)(?<!\\n))?', '\\n*\\Z')
 
             >>> match(regexs, '   123  \n\n\n\n').groups()
             ('   123  ',)
@@ -1209,7 +1242,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, _, _, _, _ = _as_regexs(expected)
 
             >>> regexs          # byexample: -tags
-            ('\\A', '(?:(?P<foo>.+?)(?<!\\n))?', '\\n*\\Z')
+            ('\\A(?:[ \\t]*\\n)*?', '(?:(?P<foo>.+?)(?<!\\n))?', '\\n*\\Z')
 
             >>> match(regexs, '123\n\n\n\n').groups()
             ('123',)
@@ -1218,7 +1251,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, _, _, _, _ = _as_regexs(expected)
 
             >>> regexs          # byexample: -tags
-            ('\\A', '\\\n', '(?:(?P<foo>.+?)(?<!\\n))?', '\\n*\\Z')
+            ('\\A(?:[ \\t]*\\n)*?', '\\\n', '(?:(?P<foo>.+?)(?<!\\n))?', '\\n*\\Z')
 
             >>> match(regexs, '\n123\n\n\n\n').groups()
             ('123',)
@@ -1235,7 +1268,7 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> regexs, charnos, rcounts, _, input_list = _as_regexs(expected)
 
             >>> regexs              # byexample: +norm-ws
-            ('\\A', 'username', '\\ ', '\\[john\\]', '\\\n',
+            ('\\A(?:[ \\t]*\\n)*?', 'username', '\\ ', '\\[john\\]', '\\\n',
              'pass', '\\ ', '\\[admin\\]', '\\ \\ ', '\\\n',
              'comment', '\\ ', '\\[', '\\ ', 'none', '\\ ', '\\]',
              '\\n*\\Z')

From 0d74009cde5bd2db81efd4c9e108b42e8c096c02 Mon Sep 17 00:00:00 2001
From: Martin Di Paola <martinp.dipaola@gmail.com>
Date: Fri, 20 Mar 2026 13:32:56 -0300
Subject: [PATCH 3/7] docs: doc+test about empty lines being ignored

---
 docs/basic/normalize-whitespace.md | 76 ++++++++++++++++++++++++++++++
 test/bad-empty-line.md             |  8 ++++
 2 files changed, 84 insertions(+)
 create mode 100644 test/bad-empty-line.md

diff --git a/docs/basic/normalize-whitespace.md b/docs/basic/normalize-whitespace.md
index d512dfc..50d7527 100644
--- a/docs/basic/normalize-whitespace.md
+++ b/docs/basic/normalize-whitespace.md
@@ -1,3 +1,11 @@
+<!--
+Check that we have byexample installed first
+$ hash byexample                                    # byexample: +fail-fast
+
+$ alias byexample=byexample\ --pretty\ none
+
+--
+-->
 # Normalize Whitespace
 
 Replace any sequence of whitespace by a single one.
@@ -35,3 +43,71 @@ Here is another example, this time written in ``Ruby``:
    10,  11, 12, 13, 14, 15, 16, 17, 18, 19]
 ```
 
+## Empty lines at the begin are ignored by default
+
+
+Consider the following `"\n  \nSome line"` output. The following three
+examples matches because by default `byexample` discards any empty line
+at the begin of the output.
+
+```python
+>>> someline = "\n  \nSome line"
+
+>>> print(someline)  # OK: <...> captures the empty lines
+<...>
+Some line
+
+>>> print(someline)  # OK too: the same reason above
+<...>Some line
+
+>>> print(someline)  # OK: byexample ignores the empty lines "as if" a <...> was there
+Some line
+```
+
+`byexample` understands as "empty lines" lines made entirely of spaces
+and tabs ended with a new line. It is subtle but such definition does
+not include indentation.
+
+Consider the following `"\n  \n  Some indented line"`:
+
+```python
+>>> someindented = "\n  \n  Some indented line"
+
+>>> print(someindented)  # FAIL: the example is not expecting indentation    # byexample: +pass
+<...>
+Some indented line
+
+>>> print(someindented)  # OK: <...> captures all including the indentation
+<...>Some indented line
+
+>>> print(someindented)  # FAIL: byexample ignores the empty lines but not the indentation   # byexample: +pass
+Some indented line
+```
+
+When `+norm-ws` is enabled, those two `FAIL` examples will work because
+`byexample` relaxes the definition of empty lines and replaces by
+"any whitespace" which the indentation gets included:
+
+```python
+>>> print(someindented) # byexample: +norm-ws
+<...>
+Some indented line
+
+>>> print(someindented)  # byexample: +norm-ws
+Some indented line
+```
+
+<!--
+
+Test a few more combinations
+
+>>> print(someindented) # byexample: +norm-ws
+<...>Some indented line
+
+Test the incorrect combinations and check that they are actually failing
+
+$ byexample -l python test/bad-empty-line.md
+<...>
+[FAIL] Pass: 0 Fail: 2 Skip: 0
+
+-->
diff --git a/test/bad-empty-line.md b/test/bad-empty-line.md
new file mode 100644
index 0000000..6f1efd3
--- /dev/null
+++ b/test/bad-empty-line.md
@@ -0,0 +1,8 @@
+```python
+>>> print("\n  \n  Some line") # should fail (missing indentation)
+<...>
+Some line
+
+>>> print("\n  \n  Some line") # should fail (missing indentation)
+Some line
+```

From 20b7922693070141d14ff81f8af268732a283ff7 Mon Sep 17 00:00:00 2001
From: Martin Di Paola <martinp.dipaola@gmail.com>
Date: Fri, 20 Mar 2026 22:27:43 -0300
Subject: [PATCH 4/7] fix: fix a quadratic case

---
 byexample/parser_sm.py | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/byexample/parser_sm.py b/byexample/parser_sm.py
index 342a0ec..2de7727 100644
--- a/byexample/parser_sm.py
+++ b/byexample/parser_sm.py
@@ -689,6 +689,11 @@ def _begin_of_string_regex(self):
         spaces and newlines can be freely skipped.
         Non-greedy *? is used so that the next part of the regex
         (e.g. a tag's \s+ lookahead) can still match the whitespace.
+
+        Note: when the first content token is itself a whitespace transition
+        (\s+(?!\s)), emit_ws() folds \A\s*? + \s+(?!\s) into the single
+        equivalent \A\s+(?!\s) to avoid a quadratic interaction between the
+        two overlapping whitespace quantifiers.
         '''
         return r'\A\s*?'
 
@@ -698,6 +703,15 @@ def emit_ws(self, just_one=False):
             rx = r'\s'
         else:
             rx = r'\s+(?!\s)'
+            # \A\s*? + \s+(?!\s) is quadratic: both parts match whitespace
+            # and the engine explores O(n^2) splits on whitespace-only strings.
+            # \A\s*?\s+(?!\s) is semantically equivalent to \A\s+(?!\s)
+            # (both match one-or-more whitespace anchored at start, stopping
+            # before non-whitespace), so fold them when this is the first
+            # content emit (only the \A\s*? anchor is in results so far).
+            if len(self.results) == 1:
+                self.results.pop()
+                rx = r'\A' + rx
         rc = 1
 
         self.record_input_event(charno, 'prefix', ' ', rx, rc)
@@ -983,6 +997,32 @@ def parse(self, expected, tags_enabled, input_enabled):
             >>> match(regexs, '   123  \n\n\n\n').groups()
             ('   123',)
 
+            When the expected starts with whitespace, \A\s*? + \s+(?!\s) would
+            be quadratic on whitespace-only got strings.  They are folded into
+            the single equivalent \A\s+(?!\s) which is linear (greedy, no
+            overlap).
+
+            >>> expected = '  foo'
+            >>> regexs, p, _, _, _ = _as_regexs(expected)
+
+            >>> regexs               # byexample: -tags
+            ('\\A\\s+(?!\\s)', 'foo', '\\s*\\Z')
+
+            >>> p
+            (0, 2, 5)
+
+            This still skips any extra leading whitespace in the got, just
+            like \A\s*?\s+(?!\s) would, because \s+ is greedy from \A.
+
+            >>> match(regexs, '\n\n  foo').groups()
+            ()
+
+            >>> match(regexs, '  foo').groups()
+            ()
+
+            >>> match(regexs, 'foo') is None
+            True
+
             >>> expected = ' '
             >>> regexs, p, _, _, _ = _as_regexs(expected)
 

From f46a3ca47cb9da651b084c012afd956924dc1f62 Mon Sep 17 00:00:00 2001
From: Martin Di Paola <martinp.dipaola@gmail.com>
Date: Fri, 20 Mar 2026 23:52:53 -0300
Subject: [PATCH 5/7] feat: impl +ignore-first-empty-lines (default true)

---
 byexample/example.py   |  1 +
 byexample/expected.py  |  4 +--
 byexample/finder.py    |  3 +-
 byexample/init.py      |  6 ++++
 byexample/parser.py    | 30 ++++++++++++++---
 byexample/parser_sm.py | 73 +++++++++++++++++++++++++++++-------------
 6 files changed, 87 insertions(+), 30 deletions(-)

diff --git a/byexample/example.py b/byexample/example.py
index 39494a2..5dc32c8 100644
--- a/byexample/example.py
+++ b/byexample/example.py
@@ -82,6 +82,7 @@ class Example(object):
 
     >>> example.options
     {'capture': True,
+     'ignore_first_empty_lines': True,
      'input_prefix_range': (6, 12),
      'norm_ws': False,
      'rm': [],
diff --git a/byexample/expected.py b/byexample/expected.py
index 5544d36..161093f 100644
--- a/byexample/expected.py
+++ b/byexample/expected.py
@@ -32,7 +32,7 @@ class _LinearExpected(Expected):
         >>> from byexample.options import Options
         >>> from byexample.finder import _build_fake_example as build_example
 
-        >>> opts = {'norm_ws': False, 'tags': True, 'capture': True, 'rm': [], 'type': False, 'input_prefix_range': (6,12)}
+        >>> opts = {'norm_ws': False, 'tags': True, 'capture': True, 'rm': [], 'type': False, 'input_prefix_range': (6,12), 'ignore_first_empty_lines': True}
 
         Consider the following example with a named capture in the expected:
 
@@ -145,7 +145,7 @@ class _LinearExpected(Expected):
 
         (See byexample.parser docs)
 
-        >>> opts = {'norm_ws': True, 'tags': True, 'capture': True, 'rm': [], 'type': False, 'input_prefix_range': (6, 12)}
+        >>> opts = {'norm_ws': True, 'tags': True, 'capture': True, 'rm': [], 'type': False, 'input_prefix_range': (6, 12), 'ignore_first_empty_lines': True}
         >>> ex = build_example('f()', '\n  <a>A \n\nB <bc> C\n<c>', opts=opts)
         >>> exp = ex.expected
 
diff --git a/byexample/finder.py b/byexample/finder.py
index 33b2c41..89469e1 100644
--- a/byexample/finder.py
+++ b/byexample/finder.py
@@ -51,7 +51,8 @@ class F:
                 'capture': True,
                 'rm': [],
                 'type': False,
-                'input_prefix_range': (6, 12)
+                'input_prefix_range': (6, 12),
+                'ignore_first_empty_lines': True,
             }
         )
     parser.extract_options = lambda x: opts
diff --git a/byexample/init.py b/byexample/init.py
index bb5cc13..e3dd679 100644
--- a/byexample/init.py
+++ b/byexample/init.py
@@ -386,6 +386,12 @@ def get_default_options_parser(cmdline_args):
     options_parser.add_flag(
         "norm-ws", default=False, help="ignore the amount of whitespaces."
     )
+    options_parser.add_flag(
+        "ignore-first-empty-lines",
+        default=True,
+        help=
+        "ignore any empty or whitespace-only lines at the begin of the got string."
+    )
     options_parser.add_flag(
         "pass",
         default=False,
diff --git a/byexample/parser.py b/byexample/parser.py
index caa8e31..b57e16e 100644
--- a/byexample/parser.py
+++ b/byexample/parser.py
@@ -193,7 +193,8 @@ def parse(self, example, concerns):
         input_prefix_len_range = options['input_prefix_range']
         expected_regexs, charnos, rcounts, tags_by_idx, input_list = self.expected_as_regexs(
             example.expected_str, options['tags'], options['capture'],
-            options['type'], options['norm_ws'], input_prefix_len_range
+            options['type'], options['norm_ws'], input_prefix_len_range,
+            options['ignore_first_empty_lines']
         )
 
         ExpectedClass = _LinearExpected
@@ -230,8 +231,14 @@ def parse(self, example, concerns):
 
     @profile
     def expected_as_regexs(
-        self, expected, tags_enabled, capture_enabled, input_enabled,
-        normalize_whitespace, input_prefix_len_range
+        self,
+        expected,
+        tags_enabled,
+        capture_enabled,
+        input_enabled,
+        normalize_whitespace,
+        input_prefix_len_range,
+        ignore_first_empty_lines=True
     ):
         r'''
         From the expected string create a list of regular expressions that
@@ -329,6 +336,19 @@ def expected_as_regexs(
             >>> tags_by_idx
             {2: None}
 
+        When ignore_first_empty_lines is False the begin anchor is a plain \\A,
+        so the got string must start exactly at the first expected character.
+
+            >>> regexs, _, _, _, _ = _as_regexs('foo', ignore_first_empty_lines=False)
+
+            >>> regexs
+            ('\\A', 'foo', '\\n*\\Z')
+
+            >>> regexs, _, _, _, _ = _as_regexs('foo', normalize_whitespace=True, ignore_first_empty_lines=False)
+
+            >>> regexs
+            ('\\A', 'foo', '\\s*\\Z')
+
         '''
         if capture_enabled:
             tag_regexs = self.tag_regexs()
@@ -338,12 +358,12 @@ def expected_as_regexs(
         if normalize_whitespace:
             sm = SM_NormWS(
                 tag_regexs, self.input_regexs(), self.ellipsis_marker(),
-                input_prefix_len_range
+                input_prefix_len_range, ignore_first_empty_lines
             )
         else:
             sm = SM_NotNormWS(
                 tag_regexs, self.input_regexs(), self.ellipsis_marker(),
-                input_prefix_len_range
+                input_prefix_len_range, ignore_first_empty_lines
             )
 
         return sm.parse(expected, tags_enabled, input_enabled)
diff --git a/byexample/parser_sm.py b/byexample/parser_sm.py
index 2de7727..ccb64e9 100644
--- a/byexample/parser_sm.py
+++ b/byexample/parser_sm.py
@@ -37,7 +37,12 @@
 
 class SM(object):
     def __init__(
-        self, tag_regexs, input_regexs, ellipsis_marker, input_prefix_len_range
+        self,
+        tag_regexs,
+        input_regexs,
+        ellipsis_marker,
+        input_prefix_len_range,
+        ignore_first_empty_lines=True
     ):
         self.tag_regex = tag_regexs.for_capture
         self.tag_split_regex = tag_regexs.for_split
@@ -49,6 +54,8 @@ def __init__(
         self.input_prefix_min_len, self.input_prefix_max_len = input_prefix_len_range
         assert self.input_prefix_min_len <= self.input_prefix_max_len
 
+        self.ignore_first_empty_lines = ignore_first_empty_lines
+
         self.reset()
 
     def reset(self):
@@ -670,11 +677,16 @@ def build_prefix(self, partial_prefixes):
 
 class SM_NormWS(SM):
     def __init__(
-        self, tag_regexs, input_regexs, ellipsis_marker, input_prefix_len_range
+        self,
+        tag_regexs,
+        input_regexs,
+        ellipsis_marker,
+        input_prefix_len_range,
+        ignore_first_empty_lines=True
     ):
         SM.__init__(
             self, tag_regexs, input_regexs, ellipsis_marker,
-            input_prefix_len_range
+            input_prefix_len_range, ignore_first_empty_lines
         )
 
     @constant
@@ -683,19 +695,25 @@ def trailing_whitespace_regex(self):
 
     def _begin_of_string_regex(self):
         r'''
-        Skip any leading whitespace (including empty lines) in the got.
+        If ignore_first_empty_lines is True (the default), skip any leading
+        whitespace (including empty lines) in the got before matching content.
+
+        In norm-ws mode all whitespace is equivalent, so leading spaces and
+        newlines can be freely skipped.  Non-greedy *? is used so that the
+        next part of the regex (e.g. a tag's \s+ lookahead) can still match
+        the whitespace.
 
-        In norm-ws mode all whitespace is equivalent, so leading
-        spaces and newlines can be freely skipped.
-        Non-greedy *? is used so that the next part of the regex
-        (e.g. a tag's \s+ lookahead) can still match the whitespace.
+        If ignore_first_empty_lines is False, use a plain \A anchor so that
+        the got string must start exactly where the expected content begins.
 
         Note: when the first content token is itself a whitespace transition
-        (\s+(?!\s)), emit_ws() folds \A\s*? + \s+(?!\s) into the single
-        equivalent \A\s+(?!\s) to avoid a quadratic interaction between the
+        (\s+(?!\s)), emit_ws() folds the begin anchor + \s+(?!\s) into the
+        single equivalent \A\s+(?!\s) to avoid a quadratic interaction between
         two overlapping whitespace quantifiers.
         '''
-        return r'\A\s*?'
+        if self.ignore_first_empty_lines:
+            return r'\A\s*?'
+        return r'\A'
 
     def emit_ws(self, just_one=False):
         charno, _ = self.pull()
@@ -703,13 +721,14 @@ def emit_ws(self, just_one=False):
             rx = r'\s'
         else:
             rx = r'\s+(?!\s)'
-            # \A\s*? + \s+(?!\s) is quadratic: both parts match whitespace
+            # If self.ignore_first_empty_lines is True,
+            # the \A\s*? + \s+(?!\s) is quadratic: both parts match whitespace
             # and the engine explores O(n^2) splits on whitespace-only strings.
             # \A\s*?\s+(?!\s) is semantically equivalent to \A\s+(?!\s)
             # (both match one-or-more whitespace anchored at start, stopping
             # before non-whitespace), so fold them when this is the first
             # content emit (only the \A\s*? anchor is in results so far).
-            if len(self.results) == 1:
+            if len(self.results) == 1 and self.ignore_first_empty_lines:
                 self.results.pop()
                 rx = r'\A' + rx
         rc = 1
@@ -1071,11 +1090,16 @@ def parse(self, expected, tags_enabled, input_enabled):
 
 class SM_NotNormWS(SM):
     def __init__(
-        self, tag_regexs, input_regexs, ellipsis_marker, input_prefix_len_range
+        self,
+        tag_regexs,
+        input_regexs,
+        ellipsis_marker,
+        input_prefix_len_range,
+        ignore_first_empty_lines=True
     ):
         SM.__init__(
             self, tag_regexs, input_regexs, ellipsis_marker,
-            input_prefix_len_range
+            input_prefix_len_range, ignore_first_empty_lines
         )
 
     @constant
@@ -1084,15 +1108,20 @@ def trailing_newlines_regex(self):
 
     def _begin_of_string_regex(self):
         r'''
-        Skip any leading empty or whitespace-only lines in the got.
+        If ignore_first_empty_lines is True (the default), skip any leading
+        empty or whitespace-only lines in the got before matching content.
 
-        A non-greedy *? is used to avoid consuming lines that the
-        expected regex (e.g. a tag) may need to match itself.
-        This is safe and non-pathological: each iteration of the
-        group consumes at least one \n, so the total work is linear
-        in the number of leading blank lines.
+        A non-greedy *? is used to avoid consuming lines that the expected
+        regex (e.g. a tag) may need to match itself.  This is safe and
+        non-pathological: each iteration of the group consumes at least one
+        \n, so the total work is linear in the number of leading blank lines.
+
+        If ignore_first_empty_lines is False, use a plain \A anchor so that
+        the got string must start exactly where the expected content begins.
         '''
-        return r'\A(?:[ \t]*\n)*?'
+        if self.ignore_first_empty_lines:
+            return r'\A(?:[ \t]*\n)*?'
+        return r'\A'
 
     def emit_tag(self, ctx, endline):
         assert ctx in ('n', '0')

From 6013ca0f16948b2962cabefa0b4f39ddfc695949 Mon Sep 17 00:00:00 2001
From: Martin Di Paola <martinp.dipaola@gmail.com>
Date: Fri, 20 Mar 2026 23:56:34 -0300
Subject: [PATCH 6/7] fix: broken test

---
 docs/contrib/how-to-support-new-finders-and-languages.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/contrib/how-to-support-new-finders-and-languages.md b/docs/contrib/how-to-support-new-finders-and-languages.md
index ceab692..ea23a1a 100644
--- a/docs/contrib/how-to-support-new-finders-and-languages.md
+++ b/docs/contrib/how-to-support-new-finders-and-languages.md
@@ -281,7 +281,7 @@ the scenes so you do not to be worry about the details):
 
 ```python
 >>> from byexample.options import Options, OptionParser
->>> parser = ArnoldCParser(cfg=Config(verbosity=0, encoding='utf-8', options=Options(rm=[], norm_ws=False, tags=True, capture=True, type=False, input_prefix_range=(6,12), optparser=OptionParser(add_help=False))))
+>>> parser = ArnoldCParser(cfg=Config(verbosity=0, encoding='utf-8', options=Options(rm=[], norm_ws=False, tags=True, capture=True, type=False, input_prefix_range=(6,12), ignore_first_empty_lines=True, optparser=OptionParser(add_help=False))))
 
 >>> from byexample.finder import Example
 >>> runner = None # not yet

From 7cf3ff82b7817b44453c20dd456be1f1d51300fc Mon Sep 17 00:00:00 2001
From: Martin Di Paola <martinp.dipaola@gmail.com>
Date: Fri, 20 Mar 2026 23:56:50 -0300
Subject: [PATCH 7/7] docs: doc -ignore-first-empty-lines option (+test)

---
 docs/basic/normalize-whitespace.md | 8 +++++++-
 test/bad-empty-line.md             | 3 +++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/docs/basic/normalize-whitespace.md b/docs/basic/normalize-whitespace.md
index 50d7527..d888fc8 100644
--- a/docs/basic/normalize-whitespace.md
+++ b/docs/basic/normalize-whitespace.md
@@ -97,6 +97,12 @@ Some indented line
 Some indented line
 ```
 
+> *New* in `byexample 11.0.0`: before `11.0.0` it was up to the user to
+> put a <...> or similar to ignore the empty lines at the begin (or use
+> `+rm=~` combined with `+norm-ws`).
+> Since `11.0.0` this is the default. If you want to old behavior you
+> can use the flag `-ignore-first-empty-lines`
+
 <!--
 
 Test a few more combinations
@@ -108,6 +114,6 @@ Test the incorrect combinations and check that they are actually failing
 
 $ byexample -l python test/bad-empty-line.md
 <...>
-[FAIL] Pass: 0 Fail: 2 Skip: 0
+[FAIL] Pass: 0 Fail: 3 Skip: 0
 
 -->
diff --git a/test/bad-empty-line.md b/test/bad-empty-line.md
index 6f1efd3..4eb9f14 100644
--- a/test/bad-empty-line.md
+++ b/test/bad-empty-line.md
@@ -5,4 +5,7 @@ Some line
 
 >>> print("\n  \n  Some line") # should fail (missing indentation)
 Some line
+
+>>> print("\n  \nSome line")  # should fail because we are using pre-11.0.0 behavour  # byexample: -ignore-first-empty-lines
+Some line
 ```