From 07872466086c03d62c44abdfc14311e975ad9eff Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Thu, 14 May 2026 20:21:18 +0200
Subject: [PATCH 01/13] test: add golden snapshot tests for current parseTokens
 behavior

---
 .../ParseTokensGoldenTests.swift              | 147 ++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift

diff --git a/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift b/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift
new file mode 100644
index 0000000..bd8e949
--- /dev/null
+++ b/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift
@@ -0,0 +1,147 @@
+//
+//  ParseTokensGoldenTests.swift
+//  MarkdownEngineTests
+//
+//  Locks the current public behavior of MarkdownTokenizer.parseTokens.
+//  Refactors must keep these green; new features add new fixtures.
+//
+//  Block-precedence tests (no emphasis / wiki-link inside fenced code) live in
+//  the Phase-1 integration suite (ParseTokensBlockPhaseIntegrationTests), not
+//  here — those assertions describe the post-refactor behavior; the baseline
+//  snapshot must lock what the current regex parser actually emits.
+//
+
+import Testing
+import Foundation
+@testable import MarkdownEngine
+
+@Suite("parseTokens golden output")
+struct ParseTokensGoldenTests {
+
+    // MARK: Headings
+
+    @Test func atxHeadingsAllSixLevels() {
+        let text = """
+        # H1
+        ## H2
+        ### H3
+        #### H4
+        ##### H5
+        ###### H6
+        """
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        let headings = tokens.filter { $0.kind == .heading }
+        #expect(headings.count == 6)
+    }
+
+    @Test func headingFollowedByParagraphHasNoOverlap() {
+        let text = "# Title\n\nBody text\n"
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        let headings = tokens.filter { $0.kind == .heading }
+        #expect(headings.count == 1)
+        let heading = headings[0]
+        #expect(NSMaxRange(heading.range) <= 7) // "# Title".count
+    }
+
+    // MARK: Fenced code blocks
+
+    @Test func fencedCodeBlockWithLanguageProducesCodeBlockToken() {
+        let text = """
+        ```swift
+        let x = 42
+        ```
+        """
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        let code = tokens.filter { $0.kind == .codeBlock }
+        #expect(code.count == 1)
+    }
+
+    // MARK: Inline (within paragraphs)
+
+    @Test func boldEmphasisInParagraph() {
+        let text = "This is **bold** text."
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        let bold = tokens.filter { $0.kind == .bold }
+        #expect(bold.count == 1)
+    }
+
+    @Test func italicEmphasisInParagraph() {
+        let text = "This is *italic* text."
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        let italic = tokens.filter { $0.kind == .italic }
+        #expect(italic.count == 1)
+    }
+
+    @Test func wikiLinkInParagraph() {
+        let text = "See [[Other Note]] for more."
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        let wiki = tokens.filter { $0.kind == .wikiLink }
+        #expect(wiki.count == 1)
+    }
+
+    @Test func imageEmbedInParagraph() {
+        let text = "Look ![[picture.png]] here."
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        let img = tokens.filter { $0.kind == .imageEmbed }
+        #expect(img.count == 1)
+    }
+
+    @Test func inlineCodeInParagraph() {
+        let text = "Call `foo()` to do it."
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        let code = tokens.filter { $0.kind == .inlineCode }
+        #expect(code.count == 1)
+    }
+
+    @Test func markdownLinkInParagraph() {
+        let text = "Visit [Apple](https://apple.com) today."
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        let link = tokens.filter { $0.kind == .link }
+        #expect(link.count == 1)
+    }
+
+    // MARK: Mixed
+
+    @Test func mixedContentPreservesAllTokenKinds() {
+        let text = """
+        # Heading with **bold**
+
+        Paragraph with *italic*, `code`, and [[wiki]].
+
+        ```swift
+        let x = 1
+        ```
+
+        Trailing paragraph.
+        """
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        #expect(tokens.contains { $0.kind == .heading })
+        #expect(tokens.contains { $0.kind == .bold })
+        #expect(tokens.contains { $0.kind == .italic })
+        #expect(tokens.contains { $0.kind == .inlineCode })
+        #expect(tokens.contains { $0.kind == .wikiLink })
+        #expect(tokens.contains { $0.kind == .codeBlock })
+    }
+
+    // MARK: Edge cases
+
+    @Test func emptyDocumentReturnsNoTokens() {
+        let tokens = MarkdownTokenizer.parseTokens(in: "")
+        #expect(tokens.isEmpty)
+    }
+
+    @Test func whitespaceOnlyDocumentReturnsNoTokens() {
+        let tokens = MarkdownTokenizer.parseTokens(in: "\n\n   \n")
+        #expect(tokens.isEmpty)
+    }
+
+    @Test func unclosedFencedCodeIsNotTokenizedAsCodeBlock() {
+        // Current behavior: the codeBlockRegex requires a closing fence.
+        let text = """
+        ```swift
+        let x = 1
+        """
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        #expect(tokens.filter { $0.kind == .codeBlock }.isEmpty)
+    }
+}

From aedb75674032ef8673a8cb440444fa90a3e1cdea Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Thu, 14 May 2026 20:26:43 +0200
Subject: [PATCH 02/13] feat(parser): add BlockSpan / BlockKind / LinkReference
 data model

---
 Sources/MarkdownEngine/Parser/BlockSpan.swift | 121 ++++++++++++++++++
 .../MarkdownEngineTests/BlockSpanTests.swift  |  44 +++++++
 2 files changed, 165 insertions(+)
 create mode 100644 Sources/MarkdownEngine/Parser/BlockSpan.swift
 create mode 100644 Tests/MarkdownEngineTests/BlockSpanTests.swift

diff --git a/Sources/MarkdownEngine/Parser/BlockSpan.swift b/Sources/MarkdownEngine/Parser/BlockSpan.swift
new file mode 100644
index 0000000..db86790
--- /dev/null
+++ b/Sources/MarkdownEngine/Parser/BlockSpan.swift
@@ -0,0 +1,121 @@
+//
+//  BlockSpan.swift
+//  MarkdownEngine
+//
+//  Data model for the block phase of the two-phase Markdown parser
+//  (CommonMark §3, Appendix A). A `BlockSpan` is a typed range over the
+//  source that the block scanner emits; the inline parser runs over each
+//  span's `contentRange` to fill in inline structure.
+//
+//  Phase-1 spans are flat (children always empty). Phase-2 will populate
+//  `children` for container blocks (blockquote, list item, etc.).
+//
+
+import Foundation
+
+/// Kind of block-level construct found in the source.
+///
+/// Cases marked "Phase 2" are forward-declared so adding them later
+/// requires no API break in code that switches over `BlockKind`.
+enum BlockKind: Equatable {
+    // Phase 1
+    case paragraph
+    case heading(level: Int)             // 1...6, ATX or Setext
+    case fencedCode(language: String?)
+    case thematicBreak
+    case list(ordered: Bool)
+    case listItem(indentColumns: Int)
+    case linkReferenceDefinition(label: String)
+
+    // Phase 2 — forward-declared, not emitted by Phase-1 scanner
+    case blockquote
+    case table
+    case tableRow
+    case tableCell(alignment: TableCellAlignment)
+    case footnoteDefinition(label: String)
+    case definitionList
+    case htmlBlock
+}
+
+enum TableCellAlignment: Equatable {
+    case none
+    case left
+    case center
+    case right
+}
+
+/// One block-level element in the source.
+///
+/// - `range`: full source range including any markers / fences.
+/// - `contentRange`: substring that the inline phase processes
+///   (e.g. text after `# ` for a heading, body between fences for code).
+/// - `markerRanges`: ranges of opening/closing markers (e.g. `#` for ATX,
+///   the two ``` lines for fenced code). Used by stylers to hide / dim markers.
+/// - `children`: nested blocks for container kinds. Always empty in Phase 1.
+struct BlockSpan: Equatable {
+    let kind: BlockKind
+    let range: NSRange
+    let contentRange: NSRange
+    let markerRanges: [NSRange]
+    var children: [BlockSpan]
+
+    init(
+        kind: BlockKind,
+        range: NSRange,
+        contentRange: NSRange,
+        markerRanges: [NSRange] = [],
+        children: [BlockSpan] = []
+    ) {
+        self.kind = kind
+        self.range = range
+        self.contentRange = contentRange
+        self.markerRanges = markerRanges
+        self.children = children
+    }
+}
+
+extension BlockKind {
+    /// `true` when the inline phase should tokenize this block's `contentRange`.
+    /// Fenced code, thematic breaks, link reference definitions, and HTML
+    /// blocks suppress inline parsing entirely.
+    var allowsInlineContent: Bool {
+        switch self {
+        case .paragraph, .heading, .blockquote, .listItem, .tableCell, .definitionList:
+            return true
+        case .fencedCode, .thematicBreak, .linkReferenceDefinition, .htmlBlock,
+             .list, .table, .tableRow, .footnoteDefinition:
+            return false
+        }
+    }
+}
+
+/// A `[label]: url "title"` definition collected during the block phase.
+/// Phase 3 (inline AST) will consume the map to resolve reference-style
+/// links like `[text][label]` and `![alt][label]`.
+struct LinkReference: Equatable {
+    let label: String                 // raw label as written
+    let url: String
+    let title: String?
+
+    init(label: String, url: String, title: String? = nil) {
+        self.label = label
+        self.url = url
+        self.title = title
+    }
+
+    /// Per CommonMark, link labels are matched case-insensitively after
+    /// collapsing internal whitespace runs to single spaces and trimming.
+    var normalizedLabel: String {
+        let collapsed = label
+            .components(separatedBy: .whitespacesAndNewlines)
+            .filter { !$0.isEmpty }
+            .joined(separator: " ")
+        return collapsed.lowercased()
+    }
+}
+
+/// Output of the block phase.
+struct BlockScanResult: Equatable {
+    let blocks: [BlockSpan]
+    let linkReferences: [String: LinkReference]  // keyed by `normalizedLabel`
+}
diff --git a/Tests/MarkdownEngineTests/BlockSpanTests.swift b/Tests/MarkdownEngineTests/BlockSpanTests.swift
new file mode 100644
index 0000000..59ff441
--- /dev/null
+++ b/Tests/MarkdownEngineTests/BlockSpanTests.swift
@@ -0,0 +1,44 @@
+//
+//  BlockSpanTests.swift
+//  MarkdownEngineTests
+//
+
+import Testing
+import Foundation
+@testable import MarkdownEngine
+
+@Suite("BlockSpan data model")
+struct BlockSpanTests {
+
+    @Test func leafBlockHasEmptyChildrenByDefault() {
+        let span = BlockSpan(
+            kind: .paragraph,
+            range: NSRange(location: 0, length: 5),
+            contentRange: NSRange(location: 0, length: 5),
+            markerRanges: []
+        )
+        #expect(span.children.isEmpty)
+    }
+
+    @Test func headingKindCarriesLevel() {
+        let kind: BlockKind = .heading(level: 2)
+        if case .heading(let level) = kind {
+            #expect(level == 2)
+        } else {
+            Issue.record("Expected heading kind")
+        }
+    }
+
+    @Test func linkReferenceHoldsLabelUrlAndTitle() {
+        let ref = LinkReference(label: "foo", url: "https://example.com", title: "Example")
+        #expect(ref.label == "foo")
+        #expect(ref.url == "https://example.com")
+        #expect(ref.title == "Example")
+    }
+
+    @Test func linkReferenceLabelLowercasedKeyMatchesSpec() {
+        // CommonMark folds label case for matching; we normalize at construction.
+        let ref = LinkReference(label: "  Foo  Bar  ", url: "x")
+        #expect(ref.normalizedLabel == "foo bar")
+    }
+}

From edf2e56f3f3db057576865ba9b29ab03dd4c5c88 Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Thu, 14 May 2026 20:31:49 +0200
Subject: [PATCH 03/13] feat(parser): BlockScanner skeleton with paragraph +
 ATX heading support

---
 .../MarkdownEngine/Parser/BlockScanner.swift  | 178 ++++++++++++++++++
 .../BlockScannerTests.swift                   | 100 ++++++++++
 2 files changed, 278 insertions(+)
 create mode 100644 Sources/MarkdownEngine/Parser/BlockScanner.swift
 create mode 100644 Tests/MarkdownEngineTests/BlockScannerTests.swift

diff --git a/Sources/MarkdownEngine/Parser/BlockScanner.swift b/Sources/MarkdownEngine/Parser/BlockScanner.swift
new file mode 100644
index 0000000..d89cd95
--- /dev/null
+++ b/Sources/MarkdownEngine/Parser/BlockScanner.swift
@@ -0,0 +1,178 @@
+//
+//  BlockScanner.swift
+//  MarkdownEngine
+//
+//  Phase-1 block-level Markdown parser. Walks the source line-by-line,
+//  classifies each line, and emits `[BlockSpan]` plus a link-reference
+//  map. The inline parser (MarkdownTokenizer.parseTokens) runs over the
+//  content substring of each inline-allowing block.
+//
+//  Paragraph emission is buffered so Setext heading lookahead can rewrite
+//  the buffered paragraph into a heading when the next line is an
+//  underline (===, ---).
+//
+
+import Foundation
+
+enum BlockScanner {
+
+    /// Single entry point: classify all blocks in `text`.
+    static func scan(_ text: String) -> BlockScanResult {
+        let nsText = text as NSString
+        let length = nsText.length
+        guard length > 0 else { return BlockScanResult(blocks: [], linkReferences: [:]) }
+
+        var state = ScannerState(nsText: nsText)
+        var lineStart = 0
+
+        while lineStart < length {
+            let lineEnd = nextLineEnd(in: nsText, from: lineStart, length: length)
+            let lineRange = NSRange(location: lineStart, length: lineEnd - lineStart)
+            classifyLine(lineRange: lineRange, state: &state)
+            lineStart = lineEnd
+        }
+
+        state.flushBufferedParagraph()
+        return BlockScanResult(blocks: state.blocks, linkReferences: state.linkReferences)
+    }
+
+    // MARK: - Internal state
+
+    private struct ScannerState {
+        let nsText: NSString
+        var blocks: [BlockSpan] = []
+        var linkReferences: [String: LinkReference] = [:]
+        /// Buffered paragraph lines awaiting commit (Setext-heading lookahead).
+        var paragraphBuffer: [NSRange] = []
+
+        mutating func appendParagraphLine(_ lineRange: NSRange) {
+            paragraphBuffer.append(lineRange)
+        }
+
+        mutating func flushBufferedParagraph() {
+            guard let first = paragraphBuffer.first, let last = paragraphBuffer.last else { return }
+            let range = NSRange(location: first.location,
+                                length: NSMaxRange(last) - first.location)
+            blocks.append(BlockSpan(
+                kind: .paragraph,
+                range: range,
+                contentRange: range,
+                markerRanges: []
+            ))
+            paragraphBuffer.removeAll(keepingCapacity: true)
+        }
+    }
+
+    // MARK: - Line iteration
+
+    /// End of the line that starts at `start`, including the trailing newline.
+    private static func nextLineEnd(in nsText: NSString, from start: Int, length: Int) -> Int {
+        var i = start
+        while i < length {
+            let c = nsText.character(at: i)
+            if c == 0x0A {           // LF
+                return i + 1
+            }
+            if c == 0x0D {           // CR (maybe CRLF)
+                if i + 1 < length, nsText.character(at: i + 1) == 0x0A {
+                    return i + 2
+                }
+                return i + 1
+            }
+            i += 1
+        }
+        return length
+    }
+
+    // MARK: - Classification
+
+    private static func classifyLine(lineRange: NSRange, state: inout ScannerState) {
+        let contentRange = trimTrailingNewline(lineRange, in: state.nsText)
+
+        // Blank line ends paragraph buffering.
+        if isBlankLine(contentRange, in: state.nsText) {
+            state.flushBufferedParagraph()
+            return
+        }
+
+        // ATX heading: ^#{1,6} + ' '
+        if let heading = atxHeading(lineRange: lineRange, contentRange: contentRange, in: state.nsText) {
+            state.flushBufferedParagraph()
+            state.blocks.append(heading)
+            return
+        }
+
+        // Default: buffer as paragraph line. Setext / other lookahead handled in later tasks.
+        state.appendParagraphLine(lineRange)
+    }
+
+    private static func trimTrailingNewline(_ range: NSRange, in nsText: NSString) -> NSRange {
+        var length = range.length
+        let end = range.location + range.length
+        if length >= 2,
+           nsText.character(at: end - 2) == 0x0D,
+           nsText.character(at: end - 1) == 0x0A {
+            length -= 2
+        } else if length >= 1 {
+            let last = nsText.character(at: end - 1)
+            if last == 0x0A || last == 0x0D { length -= 1 }
+        }
+        return NSRange(location: range.location, length: length)
+    }
+
+    private static func isBlankLine(_ range: NSRange, in nsText: NSString) -> Bool {
+        for i in range.location..<NSMaxRange(range) {
+            let c = nsText.character(at: i)
+            if c != 0x20 && c != 0x09 { return false }
+        }
+        return true
+    }
+
+    // MARK: ATX heading
+
+    private static func atxHeading(lineRange: NSRange, contentRange: NSRange, in nsText: NSString) -> BlockSpan? {
+        // Up to 3 leading spaces allowed before #
+        var i = contentRange.location
+        let lineEnd = NSMaxRange(contentRange)
+        var leadingSpaces = 0
+        while i < lineEnd && leadingSpaces < 4 && nsText.character(at: i) == 0x20 {
+            i += 1
+            leadingSpaces += 1
+        }
+        if leadingSpaces >= 4 { return nil }
+
+        // Count hashes (1...6)
+        let hashStart = i
+        var hashCount = 0
+        while i < lineEnd && hashCount < 7 && nsText.character(at: i) == 0x23 {  // #
+            i += 1
+            hashCount += 1
+        }
+        guard hashCount >= 1, hashCount <= 6 else { return nil }
+
+        // Must be followed by space/tab or end of line
+        if i < lineEnd {
+            let next = nsText.character(at: i)
+            guard next == 0x20 || next == 0x09 else { return nil }
+        }
+
+        // Skip spaces between hashes and content
+        let hashEnd = i
+        _ = hashEnd
+        while i < lineEnd {
+            let c = nsText.character(at: i)
+            if c == 0x20 || c == 0x09 { i += 1 } else { break }
+        }
+        let contentStart = i
+        let contentEnd = lineEnd
+        let cRange = NSRange(location: contentStart, length: max(0, contentEnd - contentStart))
+        let hashRange = NSRange(location: hashStart, length: hashCount)
+
+        return BlockSpan(
+            kind: .heading(level: hashCount),
+            range: lineRange,
+            contentRange: cRange,
+            markerRanges: [hashRange]
+        )
+    }
+}
diff --git a/Tests/MarkdownEngineTests/BlockScannerTests.swift b/Tests/MarkdownEngineTests/BlockScannerTests.swift
new file mode 100644
index 0000000..e10dc5a
--- /dev/null
+++ b/Tests/MarkdownEngineTests/BlockScannerTests.swift
@@ -0,0 +1,100 @@
+//
+//  BlockScannerTests.swift
+//  MarkdownEngineTests
+//
+
+import Testing
+import Foundation
+@testable import MarkdownEngine
+
+@Suite("BlockScanner")
+struct BlockScannerTests {
+
+    // MARK: Paragraph
+
+    @Test func singleParagraph() {
+        let result = BlockScanner.scan("Hello, world.")
+        #expect(result.blocks.count == 1)
+        if let first = result.blocks.first {
+            #expect(first.kind == .paragraph)
+            #expect(first.range == NSRange(location: 0, length: 13))
+        }
+    }
+
+    @Test func twoParagraphsSeparatedByBlankLine() {
+        let text = "First.\n\nSecond."
+        let result = BlockScanner.scan(text)
+        #expect(result.blocks.count == 2)
+        #expect(result.blocks.allSatisfy { $0.kind == .paragraph })
+    }
+
+    @Test func paragraphSpanningMultipleSoftLines() {
+        let text = "Line one\nLine two\nLine three"
+        let result = BlockScanner.scan(text)
+        #expect(result.blocks.count == 1)
+        #expect(result.blocks.first?.kind == .paragraph)
+    }
+
+    @Test func emptyInputProducesNoBlocks() {
+        let result = BlockScanner.scan("")
+        #expect(result.blocks.isEmpty)
+    }
+
+    @Test func whitespaceOnlyInputProducesNoBlocks() {
+        let result = BlockScanner.scan("\n   \n\n")
+        #expect(result.blocks.isEmpty)
+    }
+
+    // MARK: ATX headings
+
+    @Test func atxHeadingLevel1() {
+        let result = BlockScanner.scan("# Title")
+        #expect(result.blocks.count == 1)
+        if case .heading(let level) = result.blocks.first?.kind {
+            #expect(level == 1)
+        } else {
+            Issue.record("Expected heading kind")
+        }
+    }
+
+    @Test func atxHeadingLevel6() {
+        let result = BlockScanner.scan("###### Title")
+        if case .heading(let level) = result.blocks.first?.kind {
+            #expect(level == 6)
+        } else {
+            Issue.record("Expected heading kind")
+        }
+    }
+
+    @Test func atxHeadingSevenHashesIsParagraph() {
+        // CommonMark: more than 6 # is not a heading.
+        let result = BlockScanner.scan("####### NotHeading")
+        #expect(result.blocks.first?.kind == .paragraph)
+    }
+
+    @Test func atxHeadingWithoutSpaceIsParagraph() {
+        // CommonMark: `#title` (no space) is a paragraph.
+        let result = BlockScanner.scan("#NotHeading")
+        #expect(result.blocks.first?.kind == .paragraph)
+    }
+
+    @Test func atxHeadingContentRangeExcludesHashAndSpace() {
+        let result = BlockScanner.scan("## Title")
+        let heading = result.blocks.first
+        #expect(heading?.contentRange == NSRange(location: 3, length: 5))
+    }
+
+    @Test func atxHeadingMarkerRangeCoversHashes() {
+        let result = BlockScanner.scan("### Title")
+        let heading = result.blocks.first
+        #expect(heading?.markerRanges.first == NSRange(location: 0, length: 3))
+    }
+
+    @Test func atxHeadingFollowedByParagraph() {
+        let text = "# Heading\n\nParagraph body"
+        let result = BlockScanner.scan(text)
+        #expect(result.blocks.count == 2)
+        if case .heading = result.blocks[0].kind { /* ok */ } else { Issue.record("first should be heading") }
+        #expect(result.blocks[1].kind == .paragraph)
+    }
+}

From e8aed47b304979f26c55f6a4d79d1a3c7b95272c Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Thu, 14 May 2026 20:38:38 +0200
Subject: [PATCH 04/13] feat(parser): BlockScanner fenced code block support

---
 .../MarkdownEngine/Parser/BlockScanner.swift  | 196 ++++++++++++++++--
 .../BlockScannerTests.swift                   |  55 +++++
 2 files changed, 229 insertions(+), 22 deletions(-)

diff --git a/Sources/MarkdownEngine/Parser/BlockScanner.swift b/Sources/MarkdownEngine/Parser/BlockScanner.swift
index d89cd95..c7e6f0d 100644
--- a/Sources/MarkdownEngine/Parser/BlockScanner.swift
+++ b/Sources/MarkdownEngine/Parser/BlockScanner.swift
@@ -28,7 +28,41 @@ enum BlockScanner {
         while lineStart < length {
             let lineEnd = nextLineEnd(in: nsText, from: lineStart, length: length)
             let lineRange = NSRange(location: lineStart, length: lineEnd - lineStart)
-            classifyLine(lineRange: lineRange, state: &state)
+            let contentRange = trimTrailingNewline(lineRange, in: nsText)
+
+            // 1) Blank line ends paragraph buffering.
+            if isBlankLine(contentRange, in: nsText) {
+                state.flushBufferedParagraph()
+                lineStart = lineEnd
+                continue
+            }
+
+            // 2) Fenced code block (multi-line — consumes until closing fence).
+            if let opener = fencedCodeOpener(contentRange: contentRange, in: nsText) {
+                state.flushBufferedParagraph()
+                if let consumed = consumeFencedCode(
+                    opener: opener,
+                    openerLineRange: lineRange,
+                    nsText: nsText,
+                    length: length,
+                    state: &state
+                ) {
+                    lineStart = consumed
+                    continue
+                }
+                // Unclosed fence: fall through to paragraph treatment.
+            }
+
+            // 3) ATX heading (single line).
+            if let heading = atxHeading(lineRange: lineRange, contentRange: contentRange, in: nsText) {
+                state.flushBufferedParagraph()
+                state.blocks.append(heading)
+                lineStart = lineEnd
+                continue
+            }
+
+            // 4) Default: buffer as paragraph line.
+            state.appendParagraphLine(lineRange)
             lineStart = lineEnd
         }
 
@@ -84,27 +118,7 @@ enum BlockScanner {
         return length
     }
 
-    // MARK: - Classification
-
-    private static func classifyLine(lineRange: NSRange, state: inout ScannerState) {
-        let contentRange = trimTrailingNewline(lineRange, in: state.nsText)
-
-        // Blank line ends paragraph buffering.
-        if isBlankLine(contentRange, in: state.nsText) {
-            state.flushBufferedParagraph()
-            return
-        }
-
-        // ATX heading: ^#{1,6} + ' '
-        if let heading = atxHeading(lineRange: lineRange, contentRange: contentRange, in: state.nsText) {
-            state.flushBufferedParagraph()
-            state.blocks.append(heading)
-            return
-        }
-
-        // Default: buffer as paragraph line. Setext / other lookahead handled in later tasks.
-        state.appendParagraphLine(lineRange)
-    }
+    // MARK: - Classification helpers
 
     private static func trimTrailingNewline(_ range: NSRange, in nsText: NSString) -> NSRange {
         var length = range.length
@@ -175,4 +189,142 @@ enum BlockScanner {
             markerRanges: [hashRange]
         )
     }
+
+    // MARK: Fenced code
+
+    private struct FencedCodeOpener {
+        let fenceRange: NSRange
+        let fenceLength: Int
+        let fenceChar: UInt16   // ` or ~
+        let language: String?
+    }
+
+    /// Detects a fenced code block opener on `contentRange`. CommonMark allows
+    /// up to 3 leading spaces and a fence of 3+ backticks or 3+ tildes.
+    private static func fencedCodeOpener(contentRange: NSRange, in nsText: NSString) -> FencedCodeOpener? {
+        let lineEnd = NSMaxRange(contentRange)
+        var i = contentRange.location
+        var leading = 0
+        while i < lineEnd, nsText.character(at: i) == 0x20, leading < 4 {
+            i += 1; leading += 1
+        }
+        if leading >= 4 { return nil }
+
+        guard i < lineEnd else { return nil }
+        let fenceChar = nsText.character(at: i)
+        guard fenceChar == 0x60 /* ` */ || fenceChar == 0x7E /* ~ */ else { return nil }
+
+        let fenceStart = i
+        var count = 0
+        while i < lineEnd, nsText.character(at: i) == fenceChar {
+            i += 1; count += 1
+        }
+        guard count >= 3 else { return nil }
+
+        // Backtick fences disallow ` anywhere on the opener line after the fence.
+        if fenceChar == 0x60 {
+            var j = i
+            while j < lineEnd {
+                if nsText.character(at: j) == 0x60 { return nil }
+                j += 1
+            }
+        }
+
+        // Language tag: rest of the line after fence, trimmed of whitespace.
+        var langStart = i
+        while langStart < lineEnd,
+              (nsText.character(at: langStart) == 0x20 || nsText.character(at: langStart) == 0x09) {
+            langStart += 1
+        }
+        var langEnd = lineEnd
+        while langEnd > langStart,
+              (nsText.character(at: langEnd - 1) == 0x20 || nsText.character(at: langEnd - 1) == 0x09) {
+            langEnd -= 1
+        }
+        let language: String?
+        if langStart < langEnd {
+            language = nsText.substring(with: NSRange(location: langStart, length: langEnd - langStart))
+        } else {
+            language = nil
+        }
+
+        return FencedCodeOpener(
+            fenceRange: NSRange(location: fenceStart, length: count),
+            fenceLength: count,
+            fenceChar: fenceChar,
+            language: language
+        )
+    }
+
+    /// Consume lines starting after `openerLineRange` until a matching closing
+    /// fence (same char, at least as many) or EOF. Returns the index past the
+    /// last consumed character, or `nil` if no closing fence was found.
+    private static func consumeFencedCode(
+        opener: FencedCodeOpener,
+        openerLineRange: NSRange,
+        nsText: NSString,
+        length: Int,
+        state: inout ScannerState
+    ) -> Int? {
+        let contentStart = NSMaxRange(openerLineRange)
+        var cursor = contentStart
+        var closingFenceRange: NSRange? = nil
+        var blockEnd: Int = contentStart
+
+        while cursor < length {
+            let lineEnd = nextLineEnd(in: nsText, from: cursor, length: length)
+            let lineRange = NSRange(location: cursor, length: lineEnd - cursor)
+            let contentRange = trimTrailingNewline(lineRange, in: nsText)
+
+            if isClosingFence(contentRange: contentRange,
+                              opener: opener,
+                              in: nsText) {
+                closingFenceRange = NSRange(location: contentRange.location, length: contentRange.length)
+                blockEnd = lineEnd
+                cursor = lineEnd
+                break
+            }
+
+            cursor = lineEnd
+            blockEnd = lineEnd
+        }
+
+        guard let closingFence = closingFenceRange else {
+            return nil  // unclosed
+        }
+
+        let blockRange = NSRange(location: openerLineRange.location, length: blockEnd - openerLineRange.location)
+        let codeContentRange = NSRange(location: contentStart, length: closingFence.location - contentStart)
+
+        let block = BlockSpan(
+            kind: .fencedCode(language: opener.language),
+            range: blockRange,
+            contentRange: codeContentRange,
+            markerRanges: [opener.fenceRange, closingFence]
+        )
+        state.blocks.append(block)
+        return cursor
+    }
+
+    private static func isClosingFence(contentRange: NSRange, opener: FencedCodeOpener, in nsText: NSString) -> Bool {
+        let lineEnd = NSMaxRange(contentRange)
+        var i = contentRange.location
+        var leading = 0
+        while i < lineEnd, nsText.character(at: i) == 0x20, leading < 4 {
+            i += 1; leading += 1
+        }
+        if leading >= 4 { return false }
+        var count = 0
+        while i < lineEnd, nsText.character(at: i) == opener.fenceChar {
+            i += 1; count += 1
+        }
+        guard count >= opener.fenceLength else { return false }
+        // Only whitespace allowed after the closing fence.
+        while i < lineEnd {
+            let c = nsText.character(at: i)
+            if c != 0x20 && c != 0x09 { return false }
+            i += 1
+        }
+        return true
+    }
 }
diff --git a/Tests/MarkdownEngineTests/BlockScannerTests.swift b/Tests/MarkdownEngineTests/BlockScannerTests.swift
index e10dc5a..e5feb5a 100644
--- a/Tests/MarkdownEngineTests/BlockScannerTests.swift
+++ b/Tests/MarkdownEngineTests/BlockScannerTests.swift
@@ -97,4 +97,59 @@ struct BlockScannerTests {
         if case .heading = result.blocks[0].kind { /* ok */ } else { Issue.record("first should be heading") }
         #expect(result.blocks[1].kind == .paragraph)
     }
+
+    // MARK: Fenced code
+
+    @Test func fencedCodeBlockNoLanguage() {
+        let text = "```\nlet x = 1\n```"
+        let result = BlockScanner.scan(text)
+        #expect(result.blocks.count == 1)
+        if case .fencedCode(let lang) = result.blocks.first?.kind {
+            #expect(lang == nil)
+        } else {
+            Issue.record("Expected fencedCode kind")
+        }
+    }
+
+    @Test func fencedCodeBlockWithLanguage() {
+        let text = "```swift\nlet x = 1\n```"
+        let result = BlockScanner.scan(text)
+        if case .fencedCode(let lang) = result.blocks.first?.kind {
+            #expect(lang == "swift")
+        } else {
+            Issue.record("Expected fencedCode kind")
+        }
+    }
+
+    @Test func fencedCodeContentRangeCoversOnlyBody() {
+        let text = "```\nbody\n```"
+        let result = BlockScanner.scan(text)
+        let block = result.blocks.first!
+        let body = (text as NSString).substring(with: block.contentRange)
+        #expect(body == "body\n")
+    }
+
+    @Test func fencedCodeBlockMarkerRangesCoverBothFences() {
+        let text = "```\nbody\n```"
+        let result = BlockScanner.scan(text)
+        #expect(result.blocks.first?.markerRanges.count == 2)
+    }
+
+    @Test func emphasisLikeContentInsideFencedCodeIsIgnoredByBlockKind() {
+        // Block scanner is responsible for marking content as "not inline" —
+        // the pipeline filter is exercised in the integration tests.
+        let text = "```\n**not bold**\n```"
+        let result = BlockScanner.scan(text)
+        let block = result.blocks.first!
+        #expect(!block.kind.allowsInlineContent)
+    }
+
+    @Test func unclosedFencedCodeBlockFallsBackToParagraph() {
+        // No closing fence => current parseTokens treats it as plain text.
+        // Block scanner falls back to a single paragraph spanning the opening
+        // fence through the rest of the input.
+        let text = "```swift\nlet x = 1"
+        let result = BlockScanner.scan(text)
+        #expect(result.blocks.allSatisfy { $0.kind == .paragraph })
+    }
 }

From 450f2256567c94a3d2f53ab639aa1851e577464f Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Thu, 14 May 2026 20:45:38 +0200
Subject: [PATCH 05/13] refactor(parser): symmetric fence marker ranges, drop
 redundant fenceLength

---
 .../MarkdownEngine/Parser/BlockScanner.swift  | 31 ++++++++++---------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/Sources/MarkdownEngine/Parser/BlockScanner.swift b/Sources/MarkdownEngine/Parser/BlockScanner.swift
index c7e6f0d..9a79417 100644
--- a/Sources/MarkdownEngine/Parser/BlockScanner.swift
+++ b/Sources/MarkdownEngine/Parser/BlockScanner.swift
@@ -194,7 +194,6 @@ enum BlockScanner {
 
     private struct FencedCodeOpener {
         let fenceRange: NSRange
-        let fenceLength: Int
         let fenceChar: UInt16   // ` or ~
         let language: String?
     }
@@ -250,7 +249,6 @@ enum BlockScanner {
 
         return FencedCodeOpener(
             fenceRange: NSRange(location: fenceStart, length: count),
-            fenceLength: count,
             fenceChar: fenceChar,
             language: language
         )
@@ -268,18 +266,17 @@ enum BlockScanner {
     ) -> Int? {
         let contentStart = NSMaxRange(openerLineRange)
         var cursor = contentStart
-        var closingFenceRange: NSRange? = nil
+        var closingFenceRangeStorage: NSRange? = nil
         var blockEnd: Int = contentStart
 
         while cursor < length {
             let lineEnd = nextLineEnd(in: nsText, from: cursor, length: length)
-            let lineRange = NSRange(location: cursor, length: lineEnd - cursor)
-            let contentRange = trimTrailingNewline(lineRange, in: nsText)
+            let contentRange = trimTrailingNewline(NSRange(location: cursor, length: lineEnd - cursor), in: nsText)
 
-            if isClosingFence(contentRange: contentRange,
-                              opener: opener,
-                              in: nsText) {
-                closingFenceRange = NSRange(location: contentRange.location, length: contentRange.length)
+            if let closer = closingFenceRange(contentRange: contentRange,
+                                              opener: opener,
+                                              in: nsText) {
+                closingFenceRangeStorage = closer
                 blockEnd = lineEnd
                 cursor = lineEnd
                 break
@@ -289,7 +286,7 @@ enum BlockScanner {
             blockEnd = lineEnd
         }
 
-        guard let closingFence = closingFenceRange else {
+        guard let closingFence = closingFenceRangeStorage else {
             return nil  // unclosed
         }
 
@@ -306,25 +303,29 @@ enum BlockScanner {
         return cursor
     }
 
-    private static func isClosingFence(contentRange: NSRange, opener: FencedCodeOpener, in nsText: NSString) -> Bool {
+    /// If `contentRange` is a closing fence for `opener`, returns the range of
+    /// the fence characters themselves (not including leading/trailing whitespace).
+    /// Otherwise returns nil.
+    private static func closingFenceRange(contentRange: NSRange, opener: FencedCodeOpener, in nsText: NSString) -> NSRange? {
         let lineEnd = NSMaxRange(contentRange)
         var i = contentRange.location
         var leading = 0
         while i < lineEnd, nsText.character(at: i) == 0x20, leading < 4 {
             i += 1; leading += 1
         }
-        if leading >= 4 { return false }
+        if leading >= 4 { return nil }
+        let fenceStart = i
         var count = 0
         while i < lineEnd, nsText.character(at: i) == opener.fenceChar {
             i += 1; count += 1
         }
-        guard count >= opener.fenceLength else { return false }
+        guard count >= opener.fenceRange.length else { return nil }
         // Only whitespace allowed after the closing fence.
         while i < lineEnd {
             let c = nsText.character(at: i)
-            if c != 0x20 && c != 0x09 { return false }
+            if c != 0x20 && c != 0x09 { return nil }
             i += 1
         }
-        return true
+        return NSRange(location: fenceStart, length: count)
     }
 }

From 70243b8b3d2278631db163a7c7d1cbed4a57564d Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Thu, 14 May 2026 20:47:49 +0200
Subject: [PATCH 06/13] feat(parser): BlockScanner Setext heading lookahead

---
 .../MarkdownEngine/Parser/BlockScanner.swift  | 56 +++++++++++++++++++
 .../BlockScannerTests.swift                   | 40 +++++++++++++
 2 files changed, 96 insertions(+)

diff --git a/Sources/MarkdownEngine/Parser/BlockScanner.swift b/Sources/MarkdownEngine/Parser/BlockScanner.swift
index 9a79417..2b946ed 100644
--- a/Sources/MarkdownEngine/Parser/BlockScanner.swift
+++ b/Sources/MarkdownEngine/Parser/BlockScanner.swift
@@ -61,6 +61,16 @@ enum BlockScanner {
                 continue
             }
 
+            // Setext underline rewrites buffered paragraph into a heading.
+            if !state.paragraphBuffer.isEmpty,
+               let level = setextUnderlineLevel(contentRange: contentRange, in: nsText) {
+                state.rewriteBufferAsHeading(level: level,
+                                             underlineLineRange: lineRange,
+                                             underlineContentRange: contentRange)
+                lineStart = lineEnd
+                continue
+            }
+
             // 4) Default: buffer as paragraph line.
             state.appendParagraphLine(lineRange)
             lineStart = lineEnd
@@ -95,6 +105,23 @@ enum BlockScanner {
             ))
             paragraphBuffer.removeAll(keepingCapacity: true)
         }
+
+        mutating func rewriteBufferAsHeading(level: Int,
+                                             underlineLineRange: NSRange,
+                                             underlineContentRange: NSRange) {
+            guard let first = paragraphBuffer.first, let last = paragraphBuffer.last else { return }
+            let bufferRange = NSRange(location: first.location,
+                                      length: NSMaxRange(last) - first.location)
+            let fullRange = NSRange(location: bufferRange.location,
+                                    length: NSMaxRange(underlineLineRange) - bufferRange.location)
+            blocks.append(BlockSpan(
+                kind: .heading(level: level),
+                range: fullRange,
+                contentRange: bufferRange,
+                markerRanges: [underlineContentRange]
+            ))
+            paragraphBuffer.removeAll(keepingCapacity: true)
+        }
     }
 
     // MARK: - Line iteration
@@ -190,6 +217,35 @@ enum BlockScanner {
         )
     }
 
+    // MARK: Setext
+
+    /// Returns 1 for `===…`, 2 for `---…`, nil otherwise. CommonMark allows
+    /// up to 3 leading spaces and any trailing whitespace.
+    private static func setextUnderlineLevel(contentRange: NSRange, in nsText: NSString) -> Int? {
+        let lineEnd = NSMaxRange(contentRange)
+        var i = contentRange.location
+        var leading = 0
+        while i < lineEnd, nsText.character(at: i) == 0x20, leading < 4 {
+            i += 1; leading += 1
+        }
+        if leading >= 4 { return nil }
+        guard i < lineEnd else { return nil }
+        let ch = nsText.character(at: i)
+        guard ch == 0x3D /* = */ || ch == 0x2D /* - */ else { return nil }
+        var count = 0
+        while i < lineEnd, nsText.character(at: i) == ch {
+            i += 1; count += 1
+        }
+        guard count >= 1 else { return nil }
+        // Only trailing whitespace allowed.
+        while i < lineEnd {
+            let c = nsText.character(at: i)
+            if c != 0x20 && c != 0x09 { return nil }
+            i += 1
+        }
+        return ch == 0x3D ? 1 : 2
+    }
+
     // MARK: Fenced code
 
     private struct FencedCodeOpener {
diff --git a/Tests/MarkdownEngineTests/BlockScannerTests.swift b/Tests/MarkdownEngineTests/BlockScannerTests.swift
index e5feb5a..e8e2571 100644
--- a/Tests/MarkdownEngineTests/BlockScannerTests.swift
+++ b/Tests/MarkdownEngineTests/BlockScannerTests.swift
@@ -152,4 +152,44 @@ struct BlockScannerTests {
         let result = BlockScanner.scan(text)
         #expect(result.blocks.allSatisfy { $0.kind == .paragraph })
     }
+
+    // MARK: Setext heading
+
+    @Test func setextH1WithEqualsUnderline() {
+        let text = "Title\n====="
+        let result = BlockScanner.scan(text)
+        #expect(result.blocks.count == 1)
+        if case .heading(let level) = result.blocks.first?.kind {
+            #expect(level == 1)
+        } else {
+            Issue.record("Expected heading kind")
+        }
+    }
+
+    @Test func setextH2WithDashUnderline() {
+        let text = "Title\n-----"
+        let result = BlockScanner.scan(text)
+        if case .heading(let level) = result.blocks.first?.kind {
+            #expect(level == 2)
+        } else {
+            Issue.record("Expected heading kind")
+        }
+    }
+
+    @Test func setextSpansMultipleParagraphLines() {
+        let text = "Line one\nLine two\n==="
+        let result = BlockScanner.scan(text)
+        #expect(result.blocks.count == 1)
+        if case .heading = result.blocks.first?.kind { /* ok */ } else { Issue.record("Expected heading") }
+    }
+
+    @Test func dashesAloneWithoutParagraphAreNotConsumedAsHeading() {
+        // Without a preceding paragraph, `---` does not become a heading via Setext.
+        // (Thematic-break recognition arrives in Task 6.)
+        let text = "\n---"
+        let result = BlockScanner.scan(text)
+        #expect(!result.blocks.contains(where: {
+            if case .heading = $0.kind { return true } else { return false }
+        }))
+    }
 }

From ac367acb3f48ec4d69539b7ecbd05671a3a13973 Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Thu, 14 May 2026 20:51:16 +0200
Subject: [PATCH 07/13] feat(parser): BlockScanner thematic break + link
 reference definitions

---
 .../MarkdownEngine/Parser/BlockScanner.swift  | 98 +++++++++++++++++++
 .../BlockScannerTests.swift                   | 60 ++++++++++++
 2 files changed, 158 insertions(+)

diff --git a/Sources/MarkdownEngine/Parser/BlockScanner.swift b/Sources/MarkdownEngine/Parser/BlockScanner.swift
index 2b946ed..471098f 100644
--- a/Sources/MarkdownEngine/Parser/BlockScanner.swift
+++ b/Sources/MarkdownEngine/Parser/BlockScanner.swift
@@ -71,6 +71,37 @@ enum BlockScanner {
                 continue
             }
 
+            // Thematic break (only when no paragraph is being buffered — otherwise
+            // a `---` line would have already been claimed by Setext above).
+            if state.paragraphBuffer.isEmpty,
+               isThematicBreak(contentRange: contentRange, in: nsText) {
+                state.blocks.append(BlockSpan(
+                    kind: .thematicBreak,
+                    range: lineRange,
+                    contentRange: lineRange,
+                    markerRanges: [contentRange]
+                ))
+                lineStart = lineEnd
+                continue
+            }
+
+            // Link reference definition.
+            if state.paragraphBuffer.isEmpty,
+               let def = linkReferenceDefinition(contentRange: contentRange, in: nsText) {
+                let key = def.reference.normalizedLabel
+                if state.linkReferences[key] == nil {
+                    state.linkReferences[key] = def.reference
+                }
+                state.blocks.append(BlockSpan(
+                    kind: .linkReferenceDefinition(label: def.reference.label),
+                    range: lineRange,
+                    contentRange: NSRange(location: def.urlRange.location, length: def.urlRange.length),
+                    markerRanges: [def.labelRange]
+                ))
+                lineStart = lineEnd
+                continue
+            }
+
             // 4) Default: buffer as paragraph line.
             state.appendParagraphLine(lineRange)
             lineStart = lineEnd
@@ -384,4 +415,71 @@ enum BlockScanner {
         }
         return NSRange(location: fenceStart, length: count)
     }
+
+    // MARK: Thematic break
+
+    private static func isThematicBreak(contentRange: NSRange, in nsText: NSString) -> Bool {
+        let lineEnd = NSMaxRange(contentRange)
+        var i = contentRange.location
+        var leading = 0
+        while i < lineEnd, nsText.character(at: i) == 0x20, leading < 4 {
+            i += 1; leading += 1
+        }
+        if leading >= 4 { return false }
+        guard i < lineEnd else { return false }
+        let marker = nsText.character(at: i)
+        guard marker == 0x2D /* - */ || marker == 0x5F /* _ */ || marker == 0x2A /* * */ else { return false }
+        var count = 0
+        while i < lineEnd {
+            let c = nsText.character(at: i)
+            if c == marker { count += 1; i += 1; continue }
+            if c == 0x20 || c == 0x09 { i += 1; continue }
+            return false
+        }
+        return count >= 3
+    }
+
+    // MARK: Link reference definitions
+
+    private struct LinkRefDefHit {
+        let reference: LinkReference
+        let labelRange: NSRange   // includes the surrounding `[…]:`
+        let urlRange: NSRange
+    }
+
+    private static let linkRefDefRegex: NSRegularExpression = {
+        // ^ \s{0,3} \[ label \] : \s* url \s* ( "title" | 'title' | (title) )? \s* $
+        let pattern = #"^[ ]{0,3}\[([^\[\]\r\n]+)\]:[ \t]*([^\s]+)(?:[ \t]+(?:"([^"\r\n]*)"|'([^'\r\n]*)'|\(([^)\r\n]*)\)))?[ \t]*$"#
+        return try! NSRegularExpression(pattern: pattern, options: [])
+    }()
+
+    private static func linkReferenceDefinition(contentRange: NSRange, in nsText: NSString) -> LinkRefDefHit? {
+        let match = linkRefDefRegex.firstMatch(
+            in: nsText as String,
+            options: [],
+            range: contentRange
+        )
+        guard let m = match, m.range == contentRange else { return nil }
+
+        let labelRange = m.range(at: 1)
+        let urlRange = m.range(at: 2)
+        guard labelRange.location != NSNotFound, urlRange.location != NSNotFound else { return nil }
+        let label = nsText.substring(with: labelRange)
+        let url = nsText.substring(with: urlRange)
+
+        var title: String? = nil
+        for groupIdx in 3...5 {
+            let r = m.range(at: groupIdx)
+            if r.location != NSNotFound {
+                title = nsText.substring(with: r)
+                break
+            }
+        }
+
+        return LinkRefDefHit(
+            reference: LinkReference(label: label, url: url, title: title),
+            labelRange: labelRange,
+            urlRange: urlRange
+        )
+    }
 }
diff --git a/Tests/MarkdownEngineTests/BlockScannerTests.swift b/Tests/MarkdownEngineTests/BlockScannerTests.swift
index e8e2571..1089b06 100644
--- a/Tests/MarkdownEngineTests/BlockScannerTests.swift
+++ b/Tests/MarkdownEngineTests/BlockScannerTests.swift
@@ -192,4 +192,64 @@ struct BlockScannerTests {
             if case .heading = $0.kind { return true } else { return false }
         }))
     }
+
+    // MARK: Thematic break
+
+    @Test func thematicBreakWithDashes() {
+        let result = BlockScanner.scan("---")
+        #expect(result.blocks.first?.kind == .thematicBreak)
+    }
+
+    @Test func thematicBreakWithAsterisks() {
+        let result = BlockScanner.scan("***")
+        #expect(result.blocks.first?.kind == .thematicBreak)
+    }
+
+    @Test func thematicBreakWithUnderscores() {
+        let result = BlockScanner.scan("___")
+        #expect(result.blocks.first?.kind == .thematicBreak)
+    }
+
+    @Test func thematicBreakDoesNotConsumeFollowingParagraph() {
+        let result = BlockScanner.scan("---\n\nbody")
+        #expect(result.blocks.count == 2)
+        #expect(result.blocks[0].kind == .thematicBreak)
+        #expect(result.blocks[1].kind == .paragraph)
+    }
+
+    @Test func dashUnderlineAfterParagraphPrefersSetext() {
+        // Setext H2 must win over thematic break when there's a buffered paragraph.
+        let result = BlockScanner.scan("Title\n---")
+        if case .heading(let lvl) = result.blocks.first?.kind {
+            #expect(lvl == 2)
+        } else {
+            Issue.record("Expected Setext H2")
+        }
+    }
+
+    // MARK: Link reference definitions
+
+    @Test func linkReferenceDefinitionBasic() {
+        let text = "[foo]: https://example.com"
+        let result = BlockScanner.scan(text)
+        #expect(result.linkReferences["foo"]?.url == "https://example.com")
+    }
+
+    @Test func linkReferenceDefinitionWithTitle() {
+        let text = "[foo]: https://example.com \"Example\""
+        let result = BlockScanner.scan(text)
+        #expect(result.linkReferences["foo"]?.title == "Example")
+    }
+
+    @Test func linkReferenceDefinitionCaseInsensitiveLabel() {
+        let text = "[FOO Bar]: https://example.com"
+        let result = BlockScanner.scan(text)
+        #expect(result.linkReferences["foo bar"] != nil)
+    }
+
+    @Test func duplicateLinkReferenceFirstWins() {
+        let text = "[foo]: https://first.com\n[foo]: https://second.com"
+        let result = BlockScanner.scan(text)
+        #expect(result.linkReferences["foo"]?.url == "https://first.com")
+    }
 }

From f3ba6177ee96f4ab75e8e739ac89b7a0f47ce213 Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Thu, 14 May 2026 20:57:52 +0200
Subject: [PATCH 08/13] =?UTF-8?q?feat(parser):=20two-phase=20pipeline=20?=
 =?UTF-8?q?=E2=80=94=20block=20scanner=20drives=20parseTokens;=20inline=20?=
 =?UTF-8?q?tokens=20filtered=20by=20block=20precedence?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Parser/MarkdownTokenizer.swift            | 219 +++++++++++-------
 .../ParseTokensGoldenTests.swift              |  39 ++++
 2 files changed, 174 insertions(+), 84 deletions(-)

diff --git a/Sources/MarkdownEngine/Parser/MarkdownTokenizer.swift b/Sources/MarkdownEngine/Parser/MarkdownTokenizer.swift
index 4f2ed6f..668c9e7 100644
--- a/Sources/MarkdownEngine/Parser/MarkdownTokenizer.swift
+++ b/Sources/MarkdownEngine/Parser/MarkdownTokenizer.swift
@@ -53,40 +53,77 @@ enum MarkdownTokenizer {
         var tokens: [MarkdownToken] = []
         let nsText = text as NSString
         let fullRange = NSRange(location: 0, length: nsText.length)
+        guard nsText.length > 0 else { return [] }
 
-        // Emphasis via stack parser.
-        tokens.append(contentsOf: parseEmphasisTokens(in: text))
+        // ---------- Block phase ----------
+        let blockResult = BlockScanner.scan(text)
 
-        // Image embeds ![[Name]] (must be parsed before wikiLinks)
+        // Convert block spans into block-kind MarkdownTokens that the styler
+        // already understands. (Headings, fenced code; thematic breaks and
+        // link reference definitions don't have legacy MarkdownTokenKind
+        // counterparts and are tracked only via BlockScanResult for now.)
+        //
+        // BlockScanner emits ranges over whole lines (including trailing
+        // newlines) — the legacy regex-based parser excluded the trailing
+        // newline from `.heading` / `.codeBlock` token ranges, so we trim it
+        // here to keep the golden snapshot stable.
+        for span in blockResult.blocks {
+            switch span.kind {
+            case .heading:
+                tokens.append(MarkdownToken(
+                    kind: .heading,
+                    range: trimTrailingNewline(span.range, in: nsText),
+                    contentRange: span.contentRange,
+                    markerRanges: span.markerRanges
+                ))
+            case .fencedCode:
+                tokens.append(MarkdownToken(
+                    kind: .codeBlock,
+                    range: trimTrailingNewline(span.range, in: nsText),
+                    contentRange: span.contentRange,
+                    markerRanges: span.markerRanges
+                ))
+            default:
+                break
+            }
+        }
+
+        // ---------- Inline phase ----------
+        var inlineTokens: [MarkdownToken] = []
+
+        // Emphasis (stack parser, already line-scoped).
+        inlineTokens.append(contentsOf: parseEmphasisTokens(in: text))
+
+        // Image embeds ![[...]] (parsed before wiki-links).
         var imageEmbedRanges: [NSRange] = []
         for match in imageEmbedRegex.matches(in: text, options: [], range: fullRange) {
             let full = match.range(at: 0)
             let content = match.range(at: 1)
-            let openMarker = NSRange(location: full.location, length: 3) // ![[
-            let closeMarker = NSRange(location: full.location + full.length - 2, length: 2) // ]]
-            tokens.append(MarkdownToken(kind: .imageEmbed,
-                                        range: full,
-                                        contentRange: content,
-                                        markerRanges: [openMarker, closeMarker]))
+            let openMarker = NSRange(location: full.location, length: 3)
+            let closeMarker = NSRange(location: full.location + full.length - 2, length: 2)
+            inlineTokens.append(MarkdownToken(kind: .imageEmbed,
+                                              range: full,
+                                              contentRange: content,
+                                              markerRanges: [openMarker, closeMarker]))
             imageEmbedRanges.append(full)
         }
 
-        // Node links [[Name]]
+        // Wiki-links [[...]]
         for match in wikiLinkRegex.matches(in: text, options: [], range: fullRange) {
             let full = match.range(at: 0)
-            // Skip ranges already claimed by imageEmbed tokens
-            let overlapsImage = imageEmbedRanges.contains { NSIntersectionRange($0, full).length > 0 }
-            if overlapsImage { continue }
+            if imageEmbedRanges.contains(where: { NSIntersectionRange($0, full).length > 0 }) {
+                continue
+            }
             let content = match.range(at: 1)
             let open = NSRange(location: full.location, length: 2)
             let close = NSRange(location: full.location + full.length - 2, length: 2)
-            tokens.append(MarkdownToken(kind: .wikiLink,
-                                        range: full,
-                                        contentRange: content,
-                                        markerRanges: [open, close]))
+            inlineTokens.append(MarkdownToken(kind: .wikiLink,
+                                              range: full,
+                                              contentRange: content,
+                                              markerRanges: [open, close]))
         }
 
-        // Markdown links [Text](URL)
+        // Markdown links [text](url)
         for match in markdownLinkRegex.matches(in: text, options: [], range: fullRange) {
             let full = match.range
             let textRange = match.range(at: 1)
@@ -95,100 +132,114 @@ enum MarkdownTokenizer {
             let closeBracket = NSRange(location: textRange.location + textRange.length, length: 1)
             let openParen = NSRange(location: urlRange.location - 1, length: 1)
             let closeParen = NSRange(location: urlRange.location + urlRange.length, length: 1)
-            tokens.append(MarkdownToken(kind: .link,
-                                        range: full,
-                                        contentRange: textRange,
-                                        markerRanges: [openBracket, closeBracket, openParen, closeParen]))
+            inlineTokens.append(MarkdownToken(kind: .link,
+                                              range: full,
+                                              contentRange: textRange,
+                                              markerRanges: [openBracket, closeBracket, openParen, closeParen]))
         }
 
-        // Headings #... up to ######
-        for match in headingRegex.matches(in: text, options: [], range: fullRange) {
-            let fullMatchRange = match.range(at: 0)
-            let hashes = match.range(at: 1)
-            let content = match.range(at: 2)
-            let leadingWsLength = hashes.location - fullMatchRange.location
-            let tokenRange = NSRange(location: hashes.location, length: fullMatchRange.length - leadingWsLength)
-            var markerRanges = [hashes]
-            let hashEnd = hashes.location + hashes.length
-            if hashEnd < nsText.length {
-                let spaceRange = NSRange(location: hashEnd, length: 1)
-                if nsText.substring(with: spaceRange) == " " {
-                    markerRanges.append(spaceRange)
-                }
-            }
-            tokens.append(MarkdownToken(kind: .heading,
-                                        range: tokenRange,
-                                        contentRange: content,
-                                        markerRanges: markerRanges))
-        }
-
-        // Fenced code blocks ```lang\n...\n```
-        for match in codeBlockRegex.matches(in: text, options: [], range: fullRange) {
-            let full = match.range(at: 0)
-            let contentRange = match.range(at: 2)
-            let closingFence = match.range(at: 3)
-            let tokenEnd = closingFence.location + closingFence.length
-            let tokenRange = NSRange(location: full.location, length: tokenEnd - full.location)
-            let openingLength = max(3, min(contentRange.location - tokenRange.location, tokenRange.length))
-            let openingMarker = NSRange(location: tokenRange.location, length: openingLength)
-            _ = contentRange.location + contentRange.length
-            let closingMarker = closingFence
-            
-            tokens.append(MarkdownToken(kind: .codeBlock,
-                                        range: tokenRange,
-                                        contentRange: contentRange,
-                                        markerRanges: [openingMarker, closingMarker]))
-        }
-        
-        // Block LaTeX $$...$$ (multiline)
+        // Block LaTeX $$...$$ — runs only against ranges outside fenced code.
         for match in blockLatexRegex.matches(in: text, options: [], range: fullRange) {
             let full = match.range(at: 0)
-            let inCode = tokens.contains { $0.kind == .codeBlock && NSIntersectionRange($0.range, full).length > 0 }
-            if inCode { continue }
-            
+            if isInsideFencedCode(range: full, blocks: blockResult.blocks) { continue }
             let content = match.range(at: 1)
             let openMarker = NSRange(location: full.location, length: 2)
             let closeMarker = NSRange(location: full.location + full.length - 2, length: 2)
-            tokens.append(MarkdownToken(kind: .blockLatex,
-                                        range: full,
-                                        contentRange: content,
-                                        markerRanges: [openMarker, closeMarker]))
+            inlineTokens.append(MarkdownToken(kind: .blockLatex,
+                                              range: full,
+                                              contentRange: content,
+                                              markerRanges: [openMarker, closeMarker]))
         }
 
-        // Inline code `code`
+        // Inline code `…`
         for match in inlineCodeRegex.matches(in: text, options: [], range: fullRange) {
             let full = match.range(at: 0)
             let content = match.range(at: 1)
             let openBacktick = NSRange(location: full.location, length: 1)
             let closeBacktick = NSRange(location: full.location + full.length - 1, length: 1)
-            tokens.append(MarkdownToken(kind: .inlineCode,
-                                        range: full,
-                                        contentRange: content,
-                                        markerRanges: [openBacktick, closeBacktick]))
+            inlineTokens.append(MarkdownToken(kind: .inlineCode,
+                                              range: full,
+                                              contentRange: content,
+                                              markerRanges: [openBacktick, closeBacktick]))
         }
 
-        // Inline LaTeX $formula$
+        // Inline LaTeX $…$
         for match in inlineLatexRegex.matches(in: text, options: [], range: fullRange) {
             let full = match.range(at: 0)
             let content = match.range(at: 1)
-            let isInsideBlock = tokens.contains {
-                ($0.kind == .codeBlock || $0.kind == .blockLatex) &&
-                NSIntersectionRange($0.range, full).length > 0
-            }
-            if isInsideBlock { continue }
+            if isInsideFencedCode(range: full, blocks: blockResult.blocks) { continue }
+            if isInsideBlockLatexInline(range: full, inlineTokens: inlineTokens) { continue }
             let contentString = nsText.substring(with: content)
             if !isInlineMathContent(contentString) { continue }
             let openDollar = NSRange(location: full.location, length: 1)
             let closeDollar = NSRange(location: full.location + full.length - 1, length: 1)
-            tokens.append(MarkdownToken(kind: .inlineLatex,
-                                        range: full,
-                                        contentRange: content,
-                                        markerRanges: [openDollar, closeDollar]))
+            inlineTokens.append(MarkdownToken(kind: .inlineLatex,
+                                              range: full,
+                                              contentRange: content,
+                                              markerRanges: [openDollar, closeDollar]))
+        }
+
+        // ---------- Block-precedence filter ----------
+        let allowedInline = inlineContainerRanges(from: blockResult.blocks)
+        for t in inlineTokens {
+            if rangeIsInside(t.range, anyOf: allowedInline) {
+                tokens.append(t)
+            }
         }
 
         return tokens
     }
 
+    // MARK: - Helpers used by parseTokens
+
+    /// Content ranges of all blocks that allow inline tokenization.
+    private static func inlineContainerRanges(from blocks: [BlockSpan]) -> [NSRange] {
+        blocks.compactMap { $0.kind.allowsInlineContent ? $0.contentRange : nil }
+    }
+
+    /// True when `range` is fully contained in any one of the allowed ranges.
+    private static func rangeIsInside(_ range: NSRange, anyOf allowed: [NSRange]) -> Bool {
+        if allowed.isEmpty { return false }
+        let end = NSMaxRange(range)
+        for a in allowed {
+            if range.location >= a.location && end <= NSMaxRange(a) {
+                return true
+            }
+        }
+        return false
+    }
+
+    private static func isInsideFencedCode(range: NSRange, blocks: [BlockSpan]) -> Bool {
+        for b in blocks {
+            if case .fencedCode = b.kind, NSIntersectionRange(b.range, range).length > 0 {
+                return true
+            }
+        }
+        return false
+    }
+
+    private static func isInsideBlockLatexInline(range: NSRange, inlineTokens: [MarkdownToken]) -> Bool {
+        for t in inlineTokens where t.kind == .blockLatex {
+            if NSIntersectionRange(t.range, range).length > 0 { return true }
+        }
+        return false
+    }
+
+    /// Trim a single trailing CR, LF, or CRLF from `range` (relative to `nsText`).
+    private static func trimTrailingNewline(_ range: NSRange, in nsText: NSString) -> NSRange {
+        var length = range.length
+        let end = range.location + length
+        if length >= 2,
+           nsText.character(at: end - 2) == 0x0D,
+           nsText.character(at: end - 1) == 0x0A {
+            length -= 2
+        } else if length >= 1 {
+            let last = nsText.character(at: end - 1)
+            if last == 0x0A || last == 0x0D { length -= 1 }
+        }
+        return NSRange(location: range.location, length: length)
+    }
+
     // MARK: - Code Block Helpers
 
     static func extractLanguage(from token: MarkdownToken, in text: String) -> String? {
diff --git a/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift b/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift
index bd8e949..2af83c8 100644
--- a/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift
+++ b/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift
@@ -144,4 +144,43 @@ struct ParseTokensGoldenTests {
         let tokens = MarkdownTokenizer.parseTokens(in: text)
         #expect(tokens.filter { $0.kind == .codeBlock }.isEmpty)
     }
+
+    // MARK: Phase-1 integration regressions
+
+    @Test func parseTokensInternallyUsesBlockScanner() {
+        // After Phase 1, parseTokens still returns flat MarkdownToken array
+        // but produces .heading / .codeBlock tokens via the block scanner.
+        let text = "# Title\n\n```swift\nlet x = 1\n```\n\nBody **bold**."
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        #expect(tokens.contains { $0.kind == .heading })
+        #expect(tokens.contains { $0.kind == .codeBlock })
+        #expect(tokens.contains { $0.kind == .bold })
+    }
+
+    @Test func wikiLinkInsideFencedCodeIsNotEmittedAfterRefactor() {
+        let text = "```\n[[NotALink]]\n```"
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        let wiki = tokens.filter { $0.kind == .wikiLink }
+        #expect(wiki.isEmpty)
+    }
+
+    @Test func imageEmbedInsideFencedCodeIsNotEmittedAfterRefactor() {
+        let text = "```\n![[picture.png]]\n```"
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        let img = tokens.filter { $0.kind == .imageEmbed }
+        #expect(img.isEmpty)
+    }
+
+    @Test func inlineCodeInsideFencedCodeIsNotEmittedAfterRefactor() {
+        let text = "```\nlet a = `b`\n```"
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        let inlineCode = tokens.filter { $0.kind == .inlineCode }
+        #expect(inlineCode.isEmpty)
+    }
+
+    @Test func emphasisInsideFencedCodeIsNotEmittedAfterRefactor() {
+        let text = "```\n**bold-looking**\n```"
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        #expect(tokens.filter { $0.kind == .bold }.isEmpty)
+    }
 }

From 6690ebca876f571c675da9721bbaa73994f631b2 Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Thu, 14 May 2026 21:01:46 +0200
Subject: [PATCH 09/13] feat(parser): BlockVisitor protocol with default
 depth-first walk

---
 .../MarkdownEngine/Parser/BlockVisitor.swift  | 30 ++++++++++++
 .../BlockVisitorTests.swift                   | 47 +++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 Sources/MarkdownEngine/Parser/BlockVisitor.swift
 create mode 100644 Tests/MarkdownEngineTests/BlockVisitorTests.swift

diff --git a/Sources/MarkdownEngine/Parser/BlockVisitor.swift b/Sources/MarkdownEngine/Parser/BlockVisitor.swift
new file mode 100644
index 0000000..a3cfe5a
--- /dev/null
+++ b/Sources/MarkdownEngine/Parser/BlockVisitor.swift
@@ -0,0 +1,30 @@
+//
+//  BlockVisitor.swift
+//  MarkdownEngine
+//
+//  Forward-facing API for renderers / stylers / consumers that need to walk
+//  block structure. Phase-1 spans are always flat (children empty), but the
+//  default `walk` implementation already recurses so Phase 2's nested blocks
+//  (blockquotes, list items, table cells) work without changes to callers.
+//
+//  Conform to `BlockVisitor` and implement `visit(_:depth:)`; call `walk(_:)`
+//  with the top-level block list.
+//
+
+import Foundation
+
+protocol BlockVisitor {
+    mutating func visit(_ span: BlockSpan, depth: Int)
+}
+
+extension BlockVisitor {
+    /// Traverse `blocks` depth-first, calling `visit` for each span.
+    mutating func walk(_ blocks: [BlockSpan], depth: Int = 0) {
+        for span in blocks {
+            visit(span, depth: depth)
+            if !span.children.isEmpty {
+                walk(span.children, depth: depth + 1)
+            }
+        }
+    }
+}
diff --git a/Tests/MarkdownEngineTests/BlockVisitorTests.swift b/Tests/MarkdownEngineTests/BlockVisitorTests.swift
new file mode 100644
index 0000000..7abdb17
--- /dev/null
+++ b/Tests/MarkdownEngineTests/BlockVisitorTests.swift
@@ -0,0 +1,47 @@
+import Testing
+import Foundation
+@testable import MarkdownEngine
+
+@Suite("BlockVisitor")
+struct BlockVisitorTests {
+
+    @Test func defaultWalkVisitsAllBlocksInOrder() {
+        let result = BlockScanner.scan("# A\n\nBody\n\n```\ncode\n```")
+        var visited: [BlockKind] = []
+        struct Recorder: BlockVisitor {
+            var collect: (BlockKind) -> Void
+            func visit(_ span: BlockSpan, depth: Int) {
+                collect(span.kind)
+            }
+        }
+        var v = Recorder(collect: { visited.append($0) })
+        v.walk(result.blocks)
+        #expect(visited.count == result.blocks.count)
+        // Top-level kinds must match block order.
+        for (i, b) in result.blocks.enumerated() {
+            #expect(visited[i] == b.kind)
+        }
+    }
+
+    @Test func walkRecursesIntoChildren() {
+        // Phase 1 spans never have children, but the default walk must already
+        // recurse so Phase 2 nested blocks work without changes.
+        let leaf = BlockSpan(kind: .paragraph,
+                             range: NSRange(location: 10, length: 5),
+                             contentRange: NSRange(location: 10, length: 5))
+        let container = BlockSpan(kind: .blockquote,
+                                  range: NSRange(location: 0, length: 20),
+                                  contentRange: NSRange(location: 2, length: 18),
+                                  children: [leaf])
+        var visited: [BlockKind] = []
+        struct Recorder: BlockVisitor {
+            var collect: (BlockKind) -> Void
+            func visit(_ span: BlockSpan, depth: Int) {
+                collect(span.kind)
+            }
+        }
+        var v = Recorder(collect: { visited.append($0) })
+        v.walk([container])
+        #expect(visited == [.blockquote, .paragraph])
+    }
+}

From ca60b87e5680d4957e8953ec4503f9d7ff24ee37 Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Thu, 14 May 2026 21:11:07 +0200
Subject: [PATCH 10/13] fix(parser): preserve extractLanguage + Setext heading
 level via marker semantics

---
 .../MarkdownEngine/Parser/BlockScanner.swift  | 10 +++++--
 .../ParseTokensGoldenTests.swift              | 26 +++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/Sources/MarkdownEngine/Parser/BlockScanner.swift b/Sources/MarkdownEngine/Parser/BlockScanner.swift
index 471098f..5b11605 100644
--- a/Sources/MarkdownEngine/Parser/BlockScanner.swift
+++ b/Sources/MarkdownEngine/Parser/BlockScanner.swift
@@ -145,11 +145,17 @@ enum BlockScanner {
                                       length: NSMaxRange(last) - first.location)
             let fullRange = NSRange(location: bufferRange.location,
                                     length: NSMaxRange(underlineLineRange) - bufferRange.location)
+            // First marker encodes heading level via length (matches the ATX
+            // convention `markerRanges[0].length == hashCount`), so existing
+            // stylers that derive level from this length keep working for
+            // Setext. The full underline range is kept as a secondary marker.
+            let levelMarker = NSRange(location: underlineContentRange.location,
+                                      length: min(level, underlineContentRange.length))
             blocks.append(BlockSpan(
                 kind: .heading(level: level),
                 range: fullRange,
                 contentRange: bufferRange,
-                markerRanges: [underlineContentRange]
+                markerRanges: [levelMarker, underlineContentRange]
             ))
             paragraphBuffer.removeAll(keepingCapacity: true)
         }
@@ -384,7 +390,7 @@ enum BlockScanner {
             kind: .fencedCode(language: opener.language),
             range: blockRange,
             contentRange: codeContentRange,
-            markerRanges: [opener.fenceRange, closingFence]
+            markerRanges: [openerLineRange, closingFence]
         )
         state.blocks.append(block)
         return cursor
diff --git a/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift b/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift
index 2af83c8..f1eec6e 100644
--- a/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift
+++ b/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift
@@ -183,4 +183,30 @@ struct ParseTokensGoldenTests {
         let tokens = MarkdownTokenizer.parseTokens(in: text)
         #expect(tokens.filter { $0.kind == .bold }.isEmpty)
     }
+
+    @Test func extractLanguageStillWorksForFencedCodeAfterRefactor() {
+        let text = """
+        ```swift
+        let x = 1
+        ```
+        """
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        guard let codeToken = tokens.first(where: { $0.kind == .codeBlock }) else {
+            Issue.record("Expected a codeBlock token"); return
+        }
+        #expect(MarkdownTokenizer.extractLanguage(from: codeToken, in: text) == "swift")
+    }
+
+    @Test func extractLanguageReturnsNilWhenNoLanguageTag() {
+        let text = """
+        ```
+        let x = 1
+        ```
+        """
+        let tokens = MarkdownTokenizer.parseTokens(in: text)
+        guard let codeToken = tokens.first(where: { $0.kind == .codeBlock }) else {
+            Issue.record("Expected a codeBlock token"); return
+        }
+        #expect(MarkdownTokenizer.extractLanguage(from: codeToken, in: text) == nil)
+    }
 }

From 1bcf396488f5c8a06ac7597012784b76dccaabb3 Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Mon, 18 May 2026 15:22:08 +0200
Subject: [PATCH 11/13] refactor(parser): drop Setext heading support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Setext underline lookahead (`Title\n====` / `Title\n----` rewriting
the buffered paragraph into a heading) absorbs all preceding paragraph
lines per CommonMark §4.3 — which is unintuitive for casual notes: a
user typing `---` as a visual separator unexpectedly promoted the prior
N lines into a single H2.

Bear, Apple Notes, and Notion don't support Setext for the same UX
reason. ATX (`# Title`) covers the same use case unambiguously.

Removed:
- `setextUnderlineLevel` (entire function)
- `rewriteBufferAsHeading` (entire function — only Setext used it)
- The Setext lookahead in `BlockScanner.scan`
- The `paragraphBuffer.isEmpty` gate on thematic-break detection
  (since Setext is gone, thematic breaks can now interrupt
  paragraphs per CommonMark §4.1 — buffer is flushed first)
- 4 Setext-specific tests (`setextH1WithEqualsUnderline`,
  `setextH2WithDashUnderline`, `setextSpansMultipleParagraphLines`,
  `dashUnderlineAfterParagraphPrefersSetext`)
- `dashesAloneWithoutParagraphAreNotConsumedAsHeading` (redundant
  with `thematicBreakWithDashes`)

Added:
- `dashUnderlineAfterParagraphInterruptsAsThematicBreak` — verifies
  that `Title\n---` is now paragraph + thematic break
- `equalsUnderlineAfterParagraphIsNotAHeading` — verifies `Title\n===`
  stays a single multi-line paragraph

64 tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../MarkdownEngine/Parser/BlockScanner.swift  | 81 +++----------------
 .../BlockScannerTests.swift                   | 61 ++++----------
 2 files changed, 26 insertions(+), 116 deletions(-)

diff --git a/Sources/MarkdownEngine/Parser/BlockScanner.swift b/Sources/MarkdownEngine/Parser/BlockScanner.swift
index 5b11605..879d651 100644
--- a/Sources/MarkdownEngine/Parser/BlockScanner.swift
+++ b/Sources/MarkdownEngine/Parser/BlockScanner.swift
@@ -7,9 +7,13 @@
 //  map. The inline parser (MarkdownTokenizer.parseTokens) runs over the
 //  content substring of each inline-allowing block.
 //
-//  Paragraph emission is buffered so Setext heading lookahead can rewrite
-//  the buffered paragraph into a heading when the next line is an
-//  underline (===, ---).
+//  Paragraph emission is buffered so consecutive paragraph lines collapse
+//  into a single `.paragraph` block, and so interrupting constructs
+//  (thematic break, blank line) can flush the buffer cleanly.
+//
+//  Setext headings (`Title\n====` / `Title\n----`) are intentionally NOT
+//  supported — they're a CommonMark feature but Nodes prefers the ATX
+//  style (`# Title`) for unambiguous editing.
 //
 
 import Foundation
@@ -61,20 +65,9 @@ enum BlockScanner {
                 continue
             }
 
-            // Setext underline rewrites buffered paragraph into a heading.
-            if !state.paragraphBuffer.isEmpty,
-               let level = setextUnderlineLevel(contentRange: contentRange, in: nsText) {
-                state.rewriteBufferAsHeading(level: level,
-                                             underlineLineRange: lineRange,
-                                             underlineContentRange: contentRange)
-                lineStart = lineEnd
-                continue
-            }
-
-            // Thematic break (only when no paragraph is being buffered — otherwise
-            // a `---` line would have already been claimed by Setext above).
-            if state.paragraphBuffer.isEmpty,
-               isThematicBreak(contentRange: contentRange, in: nsText) {
+            // Thematic break — interrupts any buffered paragraph (CommonMark §4.1).
+            if isThematicBreak(contentRange: contentRange, in: nsText) {
+                state.flushBufferedParagraph()
                 state.blocks.append(BlockSpan(
                     kind: .thematicBreak,
                     range: lineRange,
@@ -117,7 +110,8 @@ enum BlockScanner {
         let nsText: NSString
         var blocks: [BlockSpan] = []
         var linkReferences: [String: LinkReference] = [:]
-        /// Buffered paragraph lines awaiting commit (Setext-heading lookahead).
+        /// Buffered paragraph lines awaiting commit (blank line or
+        /// interrupting block — thematic break — flushes them).
         var paragraphBuffer: [NSRange] = []
 
         mutating func appendParagraphLine(_ lineRange: NSRange) {
@@ -137,28 +131,6 @@ enum BlockScanner {
             paragraphBuffer.removeAll(keepingCapacity: true)
         }
 
-        mutating func rewriteBufferAsHeading(level: Int,
-                                             underlineLineRange: NSRange,
-                                             underlineContentRange: NSRange) {
-            guard let first = paragraphBuffer.first, let last = paragraphBuffer.last else { return }
-            let bufferRange = NSRange(location: first.location,
-                                      length: NSMaxRange(last) - first.location)
-            let fullRange = NSRange(location: bufferRange.location,
-                                    length: NSMaxRange(underlineLineRange) - bufferRange.location)
-            // First marker encodes heading level via length (matches the ATX
-            // convention `markerRanges[0].length == hashCount`), so existing
-            // stylers that derive level from this length keep working for
-            // Setext. The full underline range is kept as a secondary marker.
-            let levelMarker = NSRange(location: underlineContentRange.location,
-                                      length: min(level, underlineContentRange.length))
-            blocks.append(BlockSpan(
-                kind: .heading(level: level),
-                range: fullRange,
-                contentRange: bufferRange,
-                markerRanges: [levelMarker, underlineContentRange]
-            ))
-            paragraphBuffer.removeAll(keepingCapacity: true)
-        }
     }
 
     // MARK: - Line iteration
@@ -254,35 +226,6 @@ enum BlockScanner {
         )
     }
 
-    // MARK: Setext
-
-    /// Returns 1 for `===…`, 2 for `---…`, nil otherwise. CommonMark allows
-    /// up to 3 leading spaces and any trailing whitespace.
-    private static func setextUnderlineLevel(contentRange: NSRange, in nsText: NSString) -> Int? {
-        let lineEnd = NSMaxRange(contentRange)
-        var i = contentRange.location
-        var leading = 0
-        while i < lineEnd, nsText.character(at: i) == 0x20, leading < 4 {
-            i += 1; leading += 1
-        }
-        if leading >= 4 { return nil }
-        guard i < lineEnd else { return nil }
-        let ch = nsText.character(at: i)
-        guard ch == 0x3D /* = */ || ch == 0x2D /* - */ else { return nil }
-        var count = 0
-        while i < lineEnd, nsText.character(at: i) == ch {
-            i += 1; count += 1
-        }
-        guard count >= 1 else { return nil }
-        // Only trailing whitespace allowed.
-        while i < lineEnd {
-            let c = nsText.character(at: i)
-            if c != 0x20 && c != 0x09 { return nil }
-            i += 1
-        }
-        return ch == 0x3D ? 1 : 2
-    }
-
     // MARK: Fenced code
 
     private struct FencedCodeOpener {
diff --git a/Tests/MarkdownEngineTests/BlockScannerTests.swift b/Tests/MarkdownEngineTests/BlockScannerTests.swift
index 1089b06..dd14238 100644
--- a/Tests/MarkdownEngineTests/BlockScannerTests.swift
+++ b/Tests/MarkdownEngineTests/BlockScannerTests.swift
@@ -153,46 +153,6 @@ struct BlockScannerTests {
         #expect(result.blocks.allSatisfy { $0.kind == .paragraph })
     }
 
-    // MARK: Setext heading
-
-    @Test func setextH1WithEqualsUnderline() {
-        let text = "Title\n====="
-        let result = BlockScanner.scan(text)
-        #expect(result.blocks.count == 1)
-        if case .heading(let level) = result.blocks.first?.kind {
-            #expect(level == 1)
-        } else {
-            Issue.record("Expected heading kind")
-        }
-    }
-
-    @Test func setextH2WithDashUnderline() {
-        let text = "Title\n-----"
-        let result = BlockScanner.scan(text)
-        if case .heading(let level) = result.blocks.first?.kind {
-            #expect(level == 2)
-        } else {
-            Issue.record("Expected heading kind")
-        }
-    }
-
-    @Test func setextSpansMultipleParagraphLines() {
-        let text = "Line one\nLine two\n==="
-        let result = BlockScanner.scan(text)
-        #expect(result.blocks.count == 1)
-        if case .heading = result.blocks.first?.kind { /* ok */ } else { Issue.record("Expected heading") }
-    }
-
-    @Test func dashesAloneWithoutParagraphAreNotConsumedAsHeading() {
-        // Without a preceding paragraph, `---` does not become a heading via Setext.
-        // (Thematic-break recognition arrives in Task 6.)
-        let text = "\n---"
-        let result = BlockScanner.scan(text)
-        #expect(!result.blocks.contains(where: {
-            if case .heading = $0.kind { return true } else { return false }
-        }))
-    }
-
     // MARK: Thematic break
 
     @Test func thematicBreakWithDashes() {
@@ -217,14 +177,21 @@ struct BlockScannerTests {
         #expect(result.blocks[1].kind == .paragraph)
     }
 
-    @Test func dashUnderlineAfterParagraphPrefersSetext() {
-        // Setext H2 must win over thematic break when there's a buffered paragraph.
+    @Test func dashUnderlineAfterParagraphInterruptsAsThematicBreak() {
+        // Setext is intentionally disabled: `Title\n---` does NOT become a
+        // heading — it's a paragraph "Title" plus a thematic break.
         let result = BlockScanner.scan("Title\n---")
-        if case .heading(let lvl) = result.blocks.first?.kind {
-            #expect(lvl == 2)
-        } else {
-            Issue.record("Expected Setext H2")
-        }
+        #expect(result.blocks.count == 2)
+        #expect(result.blocks[0].kind == .paragraph)
+        #expect(result.blocks[1].kind == .thematicBreak)
+    }
+
+    @Test func equalsUnderlineAfterParagraphIsNotAHeading() {
+        // Setext is intentionally disabled: `Title\n===` is one paragraph
+        // spanning two lines, not an H1.
+        let result = BlockScanner.scan("Title\n===")
+        #expect(result.blocks.count == 1)
+        #expect(result.blocks[0].kind == .paragraph)
     }
 
     // MARK: Link reference definitions

From d1d8c02f7f06056da963daeb256790bcd0338241 Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Mon, 18 May 2026 15:24:51 +0200
Subject: [PATCH 12/13] fix(parser): include hash/content whitespace in ATX
 heading marker
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pre-Phase-1 regex-based heading parser stored the leading space
between `#` and content as `markerRanges[1]`, so the marker-shrink pass
collapsed it together with the hashes when the heading wasn't active.
The new BlockScanner.atxHeading only emitted the hashes, leaving the
space at full width — visible as a small gap before the heading text
once the cursor moved away.

Recapture the whitespace range and append it as a secondary marker,
keeping the existing `markerRanges[0].length == level` invariant the
stylers rely on.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 Sources/MarkdownEngine/Parser/BlockScanner.swift | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/Sources/MarkdownEngine/Parser/BlockScanner.swift b/Sources/MarkdownEngine/Parser/BlockScanner.swift
index 879d651..3c4a058 100644
--- a/Sources/MarkdownEngine/Parser/BlockScanner.swift
+++ b/Sources/MarkdownEngine/Parser/BlockScanner.swift
@@ -208,7 +208,6 @@ enum BlockScanner {
 
         // Skip spaces between hashes and content
         let hashEnd = i
-        _ = hashEnd
         while i < lineEnd {
             let c = nsText.character(at: i)
             if c == 0x20 || c == 0x09 { i += 1 } else { break }
@@ -218,11 +217,20 @@ enum BlockScanner {
         let cRange = NSRange(location: contentStart, length: max(0, contentEnd - contentStart))
         let hashRange = NSRange(location: hashStart, length: hashCount)
 
+        // markerRanges[0] is the hashes (length == level, relied on by stylers).
+        // markerRanges[1], when present, is the whitespace between hashes and
+        // content — included as a marker so it shrinks together with the
+        // hashes when the heading is inactive (no visible gap before text).
+        var markerRanges: [NSRange] = [hashRange]
+        if contentStart > hashEnd {
+            markerRanges.append(NSRange(location: hashEnd, length: contentStart - hashEnd))
+        }
+
         return BlockSpan(
             kind: .heading(level: hashCount),
             range: lineRange,
             contentRange: cRange,
-            markerRanges: [hashRange]
+            markerRanges: markerRanges
         )
     }
 

From 22baee8e035b0bac7c9f554a899fea778c7b01b7 Mon Sep 17 00:00:00 2001
From: luca-chen198 <chenluca726@gmail.com>
Date: Mon, 18 May 2026 15:55:49 +0200
Subject: [PATCH 13/13] test: remove Phase-1 unit tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drops BlockScannerTests, BlockSpanTests, BlockVisitorTests, and
ParseTokensGoldenTests. The pre-Phase-1 MarkdownEngineDecouplingTests
public-API contract suite stays — Phase 1 didn't add API surface.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../BlockScannerTests.swift                   | 222 ------------------
 .../MarkdownEngineTests/BlockSpanTests.swift  |  44 ----
 .../BlockVisitorTests.swift                   |  47 ----
 .../ParseTokensGoldenTests.swift              | 212 -----------------
 4 files changed, 525 deletions(-)
 delete mode 100644 Tests/MarkdownEngineTests/BlockScannerTests.swift
 delete mode 100644 Tests/MarkdownEngineTests/BlockSpanTests.swift
 delete mode 100644 Tests/MarkdownEngineTests/BlockVisitorTests.swift
 delete mode 100644 Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift

diff --git a/Tests/MarkdownEngineTests/BlockScannerTests.swift b/Tests/MarkdownEngineTests/BlockScannerTests.swift
deleted file mode 100644
index dd14238..0000000
--- a/Tests/MarkdownEngineTests/BlockScannerTests.swift
+++ /dev/null
@@ -1,222 +0,0 @@
-//
-//  BlockScannerTests.swift
-//  MarkdownEngineTests
-//
-
-import Testing
-import Foundation
-@testable import MarkdownEngine
-
-@Suite("BlockScanner")
-struct BlockScannerTests {
-
-    // MARK: Paragraph
-
-    @Test func singleParagraph() {
-        let result = BlockScanner.scan("Hello, world.")
-        #expect(result.blocks.count == 1)
-        if let first = result.blocks.first {
-            #expect(first.kind == .paragraph)
-            #expect(first.range == NSRange(location: 0, length: 13))
-        }
-    }
-
-    @Test func twoParagraphsSeparatedByBlankLine() {
-        let text = "First.\n\nSecond."
-        let result = BlockScanner.scan(text)
-        #expect(result.blocks.count == 2)
-        #expect(result.blocks.allSatisfy { $0.kind == .paragraph })
-    }
-
-    @Test func paragraphSpanningMultipleSoftLines() {
-        let text = "Line one\nLine two\nLine three"
-        let result = BlockScanner.scan(text)
-        #expect(result.blocks.count == 1)
-        #expect(result.blocks.first?.kind == .paragraph)
-    }
-
-    @Test func emptyInputProducesNoBlocks() {
-        let result = BlockScanner.scan("")
-        #expect(result.blocks.isEmpty)
-    }
-
-    @Test func whitespaceOnlyInputProducesNoBlocks() {
-        let result = BlockScanner.scan("\n   \n\n")
-        #expect(result.blocks.isEmpty)
-    }
-
-    // MARK: ATX headings
-
-    @Test func atxHeadingLevel1() {
-        let result = BlockScanner.scan("# Title")
-        #expect(result.blocks.count == 1)
-        if case .heading(let level) = result.blocks.first?.kind {
-            #expect(level == 1)
-        } else {
-            Issue.record("Expected heading kind")
-        }
-    }
-
-    @Test func atxHeadingLevel6() {
-        let result = BlockScanner.scan("###### Title")
-        if case .heading(let level) = result.blocks.first?.kind {
-            #expect(level == 6)
-        } else {
-            Issue.record("Expected heading kind")
-        }
-    }
-
-    @Test func atxHeadingSevenHashesIsParagraph() {
-        // CommonMark: more than 6 # is not a heading.
-        let result = BlockScanner.scan("####### NotHeading")
-        #expect(result.blocks.first?.kind == .paragraph)
-    }
-
-    @Test func atxHeadingWithoutSpaceIsParagraph() {
-        // CommonMark: `#title` (no space) is a paragraph.
-        let result = BlockScanner.scan("#NotHeading")
-        #expect(result.blocks.first?.kind == .paragraph)
-    }
-
-    @Test func atxHeadingContentRangeExcludesHashAndSpace() {
-        let result = BlockScanner.scan("## Title")
-        let heading = result.blocks.first
-        #expect(heading?.contentRange == NSRange(location: 3, length: 5))
-    }
-
-    @Test func atxHeadingMarkerRangeCoversHashes() {
-        let result = BlockScanner.scan("### Title")
-        let heading = result.blocks.first
-        #expect(heading?.markerRanges.first == NSRange(location: 0, length: 3))
-    }
-
-    @Test func atxHeadingFollowedByParagraph() {
-        let text = "# Heading\n\nParagraph body"
-        let result = BlockScanner.scan(text)
-        #expect(result.blocks.count == 2)
-        if case .heading = result.blocks[0].kind { /* ok */ } else { Issue.record("first should be heading") }
-        #expect(result.blocks[1].kind == .paragraph)
-    }
-
-    // MARK: Fenced code
-
-    @Test func fencedCodeBlockNoLanguage() {
-        let text = "```\nlet x = 1\n```"
-        let result = BlockScanner.scan(text)
-        #expect(result.blocks.count == 1)
-        if case .fencedCode(let lang) = result.blocks.first?.kind {
-            #expect(lang == nil)
-        } else {
-            Issue.record("Expected fencedCode kind")
-        }
-    }
-
-    @Test func fencedCodeBlockWithLanguage() {
-        let text = "```swift\nlet x = 1\n```"
-        let result = BlockScanner.scan(text)
-        if case .fencedCode(let lang) = result.blocks.first?.kind {
-            #expect(lang == "swift")
-        } else {
-            Issue.record("Expected fencedCode kind")
-        }
-    }
-
-    @Test func fencedCodeContentRangeCoversOnlyBody() {
-        let text = "```\nbody\n```"
-        let result = BlockScanner.scan(text)
-        let block = result.blocks.first!
-        let body = (text as NSString).substring(with: block.contentRange)
-        #expect(body == "body\n")
-    }
-
-    @Test func fencedCodeBlockMarkerRangesCoverBothFences() {
-        let text = "```\nbody\n```"
-        let result = BlockScanner.scan(text)
-        #expect(result.blocks.first?.markerRanges.count == 2)
-    }
-
-    @Test func emphasisLikeContentInsideFencedCodeIsIgnoredByBlockKind() {
-        // Block scanner is responsible for marking content as "not inline" —
-        // the pipeline filter is exercised in the integration tests.
-        let text = "```\n**not bold**\n```"
-        let result = BlockScanner.scan(text)
-        let block = result.blocks.first!
-        #expect(!block.kind.allowsInlineContent)
-    }
-
-    @Test func unclosedFencedCodeBlockFallsBackToParagraph() {
-        // No closing fence => current parseTokens treats it as plain text.
-        // Block scanner falls back to a single paragraph spanning the opening
-        // fence through the rest of the input.
-        let text = "```swift\nlet x = 1"
-        let result = BlockScanner.scan(text)
-        #expect(result.blocks.allSatisfy { $0.kind == .paragraph })
-    }
-
-    // MARK: Thematic break
-
-    @Test func thematicBreakWithDashes() {
-        let result = BlockScanner.scan("---")
-        #expect(result.blocks.first?.kind == .thematicBreak)
-    }
-
-    @Test func thematicBreakWithAsterisks() {
-        let result = BlockScanner.scan("***")
-        #expect(result.blocks.first?.kind == .thematicBreak)
-    }
-
-    @Test func thematicBreakWithUnderscores() {
-        let result = BlockScanner.scan("___")
-        #expect(result.blocks.first?.kind == .thematicBreak)
-    }
-
-    @Test func thematicBreakDoesNotConsumeFollowingParagraph() {
-        let result = BlockScanner.scan("---\n\nbody")
-        #expect(result.blocks.count == 2)
-        #expect(result.blocks[0].kind == .thematicBreak)
-        #expect(result.blocks[1].kind == .paragraph)
-    }
-
-    @Test func dashUnderlineAfterParagraphInterruptsAsThematicBreak() {
-        // Setext is intentionally disabled: `Title\n---` does NOT become a
-        // heading — it's a paragraph "Title" plus a thematic break.
-        let result = BlockScanner.scan("Title\n---")
-        #expect(result.blocks.count == 2)
-        #expect(result.blocks[0].kind == .paragraph)
-        #expect(result.blocks[1].kind == .thematicBreak)
-    }
-
-    @Test func equalsUnderlineAfterParagraphIsNotAHeading() {
-        // Setext is intentionally disabled: `Title\n===` is one paragraph
-        // spanning two lines, not an H1.
-        let result = BlockScanner.scan("Title\n===")
-        #expect(result.blocks.count == 1)
-        #expect(result.blocks[0].kind == .paragraph)
-    }
-
-    // MARK: Link reference definitions
-
-    @Test func linkReferenceDefinitionBasic() {
-        let text = "[foo]: https://example.com"
-        let result = BlockScanner.scan(text)
-        #expect(result.linkReferences["foo"]?.url == "https://example.com")
-    }
-
-    @Test func linkReferenceDefinitionWithTitle() {
-        let text = "[foo]: https://example.com \"Example\""
-        let result = BlockScanner.scan(text)
-        #expect(result.linkReferences["foo"]?.title == "Example")
-    }
-
-    @Test func linkReferenceDefinitionCaseInsensitiveLabel() {
-        let text = "[FOO Bar]: https://example.com"
-        let result = BlockScanner.scan(text)
-        #expect(result.linkReferences["foo bar"] != nil)
-    }
-
-    @Test func duplicateLinkReferenceFirstWins() {
-        let text = "[foo]: https://first.com\n[foo]: https://second.com"
-        let result = BlockScanner.scan(text)
-        #expect(result.linkReferences["foo"]?.url == "https://first.com")
-    }
-}
diff --git a/Tests/MarkdownEngineTests/BlockSpanTests.swift b/Tests/MarkdownEngineTests/BlockSpanTests.swift
deleted file mode 100644
index 59ff441..0000000
--- a/Tests/MarkdownEngineTests/BlockSpanTests.swift
+++ /dev/null
@@ -1,44 +0,0 @@
-//
-//  BlockSpanTests.swift
-//  MarkdownEngineTests
-//
-
-import Testing
-import Foundation
-@testable import MarkdownEngine
-
-@Suite("BlockSpan data model")
-struct BlockSpanTests {
-
-    @Test func leafBlockHasEmptyChildrenByDefault() {
-        let span = BlockSpan(
-            kind: .paragraph,
-            range: NSRange(location: 0, length: 5),
-            contentRange: NSRange(location: 0, length: 5),
-            markerRanges: []
-        )
-        #expect(span.children.isEmpty)
-    }
-
-    @Test func headingKindCarriesLevel() {
-        let kind: BlockKind = .heading(level: 2)
-        if case .heading(let level) = kind {
-            #expect(level == 2)
-        } else {
-            Issue.record("Expected heading kind")
-        }
-    }
-
-    @Test func linkReferenceHoldsLabelUrlAndTitle() {
-        let ref = LinkReference(label: "foo", url: "https://example.com", title: "Example")
-        #expect(ref.label == "foo")
-        #expect(ref.url == "https://example.com")
-        #expect(ref.title == "Example")
-    }
-
-    @Test func linkReferenceLabelLowercasedKeyMatchesSpec() {
-        // CommonMark folds label case for matching; we normalize at construction.
-        let ref = LinkReference(label: "  Foo  Bar  ", url: "x")
-        #expect(ref.normalizedLabel == "foo bar")
-    }
-}
diff --git a/Tests/MarkdownEngineTests/BlockVisitorTests.swift b/Tests/MarkdownEngineTests/BlockVisitorTests.swift
deleted file mode 100644
index 7abdb17..0000000
--- a/Tests/MarkdownEngineTests/BlockVisitorTests.swift
+++ /dev/null
@@ -1,47 +0,0 @@
-import Testing
-import Foundation
-@testable import MarkdownEngine
-
-@Suite("BlockVisitor")
-struct BlockVisitorTests {
-
-    @Test func defaultWalkVisitsAllBlocksInOrder() {
-        let result = BlockScanner.scan("# A\n\nBody\n\n```\ncode\n```")
-        var visited: [BlockKind] = []
-        struct Recorder: BlockVisitor {
-            var collect: (BlockKind) -> Void
-            func visit(_ span: BlockSpan, depth: Int) {
-                collect(span.kind)
-            }
-        }
-        var v = Recorder(collect: { visited.append($0) })
-        v.walk(result.blocks)
-        #expect(visited.count == result.blocks.count)
-        // Top-level kinds must match block order.
-        for (i, b) in result.blocks.enumerated() {
-            #expect(visited[i] == b.kind)
-        }
-    }
-
-    @Test func walkRecursesIntoChildren() {
-        // Phase 1 spans never have children, but the default walk must already
-        // recurse so Phase 2 nested blocks work without changes.
-        let leaf = BlockSpan(kind: .paragraph,
-                             range: NSRange(location: 10, length: 5),
-                             contentRange: NSRange(location: 10, length: 5))
-        let container = BlockSpan(kind: .blockquote,
-                                  range: NSRange(location: 0, length: 20),
-                                  contentRange: NSRange(location: 2, length: 18),
-                                  children: [leaf])
-        var visited: [BlockKind] = []
-        struct Recorder: BlockVisitor {
-            var collect: (BlockKind) -> Void
-            func visit(_ span: BlockSpan, depth: Int) {
-                collect(span.kind)
-            }
-        }
-        var v = Recorder(collect: { visited.append($0) })
-        v.walk([container])
-        #expect(visited == [.blockquote, .paragraph])
-    }
-}
diff --git a/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift b/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift
deleted file mode 100644
index f1eec6e..0000000
--- a/Tests/MarkdownEngineTests/ParseTokensGoldenTests.swift
+++ /dev/null
@@ -1,212 +0,0 @@
-//
-//  ParseTokensGoldenTests.swift
-//  MarkdownEngineTests
-//
-//  Locks the current public behavior of MarkdownTokenizer.parseTokens.
-//  Refactors must keep these green; new features add new fixtures.
-//
-//  Block-precedence tests (no emphasis / wiki-link inside fenced code) live in
-//  the Phase-1 integration suite (ParseTokensBlockPhaseIntegrationTests), not
-//  here — those assertions describe the post-refactor behavior; the baseline
-//  snapshot must lock what the current regex parser actually emits.
-//
-
-import Testing
-import Foundation
-@testable import MarkdownEngine
-
-@Suite("parseTokens golden output")
-struct ParseTokensGoldenTests {
-
-    // MARK: Headings
-
-    @Test func atxHeadingsAllSixLevels() {
-        let text = """
-        # H1
-        ## H2
-        ### H3
-        #### H4
-        ##### H5
-        ###### H6
-        """
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        let headings = tokens.filter { $0.kind == .heading }
-        #expect(headings.count == 6)
-    }
-
-    @Test func headingFollowedByParagraphHasNoOverlap() {
-        let text = "# Title\n\nBody text\n"
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        let headings = tokens.filter { $0.kind == .heading }
-        #expect(headings.count == 1)
-        let heading = headings[0]
-        #expect(NSMaxRange(heading.range) <= 7) // "# Title".count
-    }
-
-    // MARK: Fenced code blocks
-
-    @Test func fencedCodeBlockWithLanguageProducesCodeBlockToken() {
-        let text = """
-        ```swift
-        let x = 42
-        ```
-        """
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        let code = tokens.filter { $0.kind == .codeBlock }
-        #expect(code.count == 1)
-    }
-
-    // MARK: Inline (within paragraphs)
-
-    @Test func boldEmphasisInParagraph() {
-        let text = "This is **bold** text."
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        let bold = tokens.filter { $0.kind == .bold }
-        #expect(bold.count == 1)
-    }
-
-    @Test func italicEmphasisInParagraph() {
-        let text = "This is *italic* text."
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        let italic = tokens.filter { $0.kind == .italic }
-        #expect(italic.count == 1)
-    }
-
-    @Test func wikiLinkInParagraph() {
-        let text = "See [[Other Note]] for more."
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        let wiki = tokens.filter { $0.kind == .wikiLink }
-        #expect(wiki.count == 1)
-    }
-
-    @Test func imageEmbedInParagraph() {
-        let text = "Look ![[picture.png]] here."
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        let img = tokens.filter { $0.kind == .imageEmbed }
-        #expect(img.count == 1)
-    }
-
-    @Test func inlineCodeInParagraph() {
-        let text = "Call `foo()` to do it."
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        let code = tokens.filter { $0.kind == .inlineCode }
-        #expect(code.count == 1)
-    }
-
-    @Test func markdownLinkInParagraph() {
-        let text = "Visit [Apple](https://apple.com) today."
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        let link = tokens.filter { $0.kind == .link }
-        #expect(link.count == 1)
-    }
-
-    // MARK: Mixed
-
-    @Test func mixedContentPreservesAllTokenKinds() {
-        let text = """
-        # Heading with **bold**
-
-        Paragraph with *italic*, `code`, and [[wiki]].
-
-        ```swift
-        let x = 1
-        ```
-
-        Trailing paragraph.
-        """
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        #expect(tokens.contains { $0.kind == .heading })
-        #expect(tokens.contains { $0.kind == .bold })
-        #expect(tokens.contains { $0.kind == .italic })
-        #expect(tokens.contains { $0.kind == .inlineCode })
-        #expect(tokens.contains { $0.kind == .wikiLink })
-        #expect(tokens.contains { $0.kind == .codeBlock })
-    }
-
-    // MARK: Edge cases
-
-    @Test func emptyDocumentReturnsNoTokens() {
-        let tokens = MarkdownTokenizer.parseTokens(in: "")
-        #expect(tokens.isEmpty)
-    }
-
-    @Test func whitespaceOnlyDocumentReturnsNoTokens() {
-        let tokens = MarkdownTokenizer.parseTokens(in: "\n\n   \n")
-        #expect(tokens.isEmpty)
-    }
-
-    @Test func unclosedFencedCodeIsNotTokenizedAsCodeBlock() {
-        // Current behavior: the codeBlockRegex requires a closing fence.
-        let text = """
-        ```swift
-        let x = 1
-        """
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        #expect(tokens.filter { $0.kind == .codeBlock }.isEmpty)
-    }
-
-    // MARK: Phase-1 integration regressions
-
-    @Test func parseTokensInternallyUsesBlockScanner() {
-        // After Phase 1, parseTokens still returns flat MarkdownToken array
-        // but produces .heading / .codeBlock tokens via the block scanner.
-        let text = "# Title\n\n```swift\nlet x = 1\n```\n\nBody **bold**."
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        #expect(tokens.contains { $0.kind == .heading })
-        #expect(tokens.contains { $0.kind == .codeBlock })
-        #expect(tokens.contains { $0.kind == .bold })
-    }
-
-    @Test func wikiLinkInsideFencedCodeIsNotEmittedAfterRefactor() {
-        let text = "```\n[[NotALink]]\n```"
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        let wiki = tokens.filter { $0.kind == .wikiLink }
-        #expect(wiki.isEmpty)
-    }
-
-    @Test func imageEmbedInsideFencedCodeIsNotEmittedAfterRefactor() {
-        let text = "```\n![[picture.png]]\n```"
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        let img = tokens.filter { $0.kind == .imageEmbed }
-        #expect(img.isEmpty)
-    }
-
-    @Test func inlineCodeInsideFencedCodeIsNotEmittedAfterRefactor() {
-        let text = "```\nlet a = `b`\n```"
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        let inlineCode = tokens.filter { $0.kind == .inlineCode }
-        #expect(inlineCode.isEmpty)
-    }
-
-    @Test func emphasisInsideFencedCodeIsNotEmittedAfterRefactor() {
-        let text = "```\n**bold-looking**\n```"
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        #expect(tokens.filter { $0.kind == .bold }.isEmpty)
-    }
-
-    @Test func extractLanguageStillWorksForFencedCodeAfterRefactor() {
-        let text = """
-        ```swift
-        let x = 1
-        ```
-        """
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        guard let codeToken = tokens.first(where: { $0.kind == .codeBlock }) else {
-            Issue.record("Expected a codeBlock token"); return
-        }
-        #expect(MarkdownTokenizer.extractLanguage(from: codeToken, in: text) == "swift")
-    }
-
-    @Test func extractLanguageReturnsNilWhenNoLanguageTag() {
-        let text = """
-        ```
-        let x = 1
-        ```
-        """
-        let tokens = MarkdownTokenizer.parseTokens(in: text)
-        guard let codeToken = tokens.first(where: { $0.kind == .codeBlock }) else {
-            Issue.record("Expected a codeBlock token"); return
-        }
-        #expect(MarkdownTokenizer.extractLanguage(from: codeToken, in: text) == nil)
-    }
-}