diff --git a/Sources/MarkdownEngine/Parser/BlockScanner.swift b/Sources/MarkdownEngine/Parser/BlockScanner.swift new file mode 100644 index 0000000..3c4a058 --- /dev/null +++ b/Sources/MarkdownEngine/Parser/BlockScanner.swift @@ -0,0 +1,442 @@ +// +// BlockScanner.swift +// MarkdownEngine +// +// Phase-1 block-level Markdown parser. Walks the source line-by-line, +// classifies each line, and emits `[BlockSpan]` plus a link-reference +// map. The inline parser (MarkdownTokenizer.parseTokens) runs over the +// content substring of each inline-allowing block. +// +// Paragraph emission is buffered so consecutive paragraph lines collapse +// into a single `.paragraph` block, and so interrupting constructs +// (thematic break, blank line) can flush the buffer cleanly. +// +// Setext headings (`Title\n====` / `Title\n----`) are intentionally NOT +// supported — they're a CommonMark feature but Nodes prefers the ATX +// style (`# Title`) for unambiguous editing. +// + +import Foundation + +enum BlockScanner { + + /// Single entry point: classify all blocks in `text`. + static func scan(_ text: String) -> BlockScanResult { + let nsText = text as NSString + let length = nsText.length + guard length > 0 else { return BlockScanResult(blocks: [], linkReferences: [:]) } + + var state = ScannerState(nsText: nsText) + var lineStart = 0 + + while lineStart < length { + let lineEnd = nextLineEnd(in: nsText, from: lineStart, length: length) + let lineRange = NSRange(location: lineStart, length: lineEnd - lineStart) + let contentRange = trimTrailingNewline(lineRange, in: nsText) + + // 1) Blank line ends paragraph buffering. + if isBlankLine(contentRange, in: nsText) { + state.flushBufferedParagraph() + lineStart = lineEnd + continue + } + + // 2) Fenced code block (multi-line — consumes until closing fence). + if let opener = fencedCodeOpener(contentRange: contentRange, in: nsText) { + state.flushBufferedParagraph() + if let consumed = consumeFencedCode( + opener: opener, + openerLineRange: lineRange, + nsText: nsText, + length: length, + state: &state + ) { + lineStart = consumed + continue + } + // Unclosed fence: fall through to paragraph treatment. + } + + // 3) ATX heading (single line). + if let heading = atxHeading(lineRange: lineRange, contentRange: contentRange, in: nsText) { + state.flushBufferedParagraph() + state.blocks.append(heading) + lineStart = lineEnd + continue + } + + // Thematic break — interrupts any buffered paragraph (CommonMark §4.1). + if isThematicBreak(contentRange: contentRange, in: nsText) { + state.flushBufferedParagraph() + state.blocks.append(BlockSpan( + kind: .thematicBreak, + range: lineRange, + contentRange: lineRange, + markerRanges: [contentRange] + )) + lineStart = lineEnd + continue + } + + // Link reference definition. + if state.paragraphBuffer.isEmpty, + let def = linkReferenceDefinition(contentRange: contentRange, in: nsText) { + let key = def.reference.normalizedLabel + if state.linkReferences[key] == nil { + state.linkReferences[key] = def.reference + } + state.blocks.append(BlockSpan( + kind: .linkReferenceDefinition(label: def.reference.label), + range: lineRange, + contentRange: NSRange(location: def.urlRange.location, length: def.urlRange.length), + markerRanges: [def.labelRange] + )) + lineStart = lineEnd + continue + } + + // 4) Default: buffer as paragraph line. + state.appendParagraphLine(lineRange) + lineStart = lineEnd + } + + state.flushBufferedParagraph() + return BlockScanResult(blocks: state.blocks, linkReferences: state.linkReferences) + } + + // MARK: - Internal state + + private struct ScannerState { + let nsText: NSString + var blocks: [BlockSpan] = [] + var linkReferences: [String: LinkReference] = [:] + /// Buffered paragraph lines awaiting commit (blank line or + /// interrupting block — thematic break — flushes them). + var paragraphBuffer: [NSRange] = [] + + mutating func appendParagraphLine(_ lineRange: NSRange) { + paragraphBuffer.append(lineRange) + } + + mutating func flushBufferedParagraph() { + guard let first = paragraphBuffer.first, let last = paragraphBuffer.last else { return } + let range = NSRange(location: first.location, + length: NSMaxRange(last) - first.location) + blocks.append(BlockSpan( + kind: .paragraph, + range: range, + contentRange: range, + markerRanges: [] + )) + paragraphBuffer.removeAll(keepingCapacity: true) + } + + } + + // MARK: - Line iteration + + /// End of the line that starts at `start`, including the trailing newline. + private static func nextLineEnd(in nsText: NSString, from start: Int, length: Int) -> Int { + var i = start + while i < length { + let c = nsText.character(at: i) + if c == 0x0A { // LF + return i + 1 + } + if c == 0x0D { // CR (maybe CRLF) + if i + 1 < length, nsText.character(at: i + 1) == 0x0A { + return i + 2 + } + return i + 1 + } + i += 1 + } + return length + } + + // MARK: - Classification helpers + + private static func trimTrailingNewline(_ range: NSRange, in nsText: NSString) -> NSRange { + var length = range.length + let end = range.location + range.length + if length >= 2, + nsText.character(at: end - 2) == 0x0D, + nsText.character(at: end - 1) == 0x0A { + length -= 2 + } else if length >= 1 { + let last = nsText.character(at: end - 1) + if last == 0x0A || last == 0x0D { length -= 1 } + } + return NSRange(location: range.location, length: length) + } + + private static func isBlankLine(_ range: NSRange, in nsText: NSString) -> Bool { + for i in range.location.. BlockSpan? { + // Up to 3 leading spaces allowed before # + var i = contentRange.location + let lineEnd = NSMaxRange(contentRange) + var leadingSpaces = 0 + while i < lineEnd && leadingSpaces < 4 && nsText.character(at: i) == 0x20 { + i += 1 + leadingSpaces += 1 + } + if leadingSpaces >= 4 { return nil } + + // Count hashes (1...6) + let hashStart = i + var hashCount = 0 + while i < lineEnd && hashCount < 7 && nsText.character(at: i) == 0x23 { // # + i += 1 + hashCount += 1 + } + guard hashCount >= 1, hashCount <= 6 else { return nil } + + // Must be followed by space/tab or end of line + if i < lineEnd { + let next = nsText.character(at: i) + guard next == 0x20 || next == 0x09 else { return nil } + } + + // Skip spaces between hashes and content + let hashEnd = i + while i < lineEnd { + let c = nsText.character(at: i) + if c == 0x20 || c == 0x09 { i += 1 } else { break } + } + let contentStart = i + let contentEnd = lineEnd + let cRange = NSRange(location: contentStart, length: max(0, contentEnd - contentStart)) + let hashRange = NSRange(location: hashStart, length: hashCount) + + // markerRanges[0] is the hashes (length == level, relied on by stylers). + // markerRanges[1], when present, is the whitespace between hashes and + // content — included as a marker so it shrinks together with the + // hashes when the heading is inactive (no visible gap before text). + var markerRanges: [NSRange] = [hashRange] + if contentStart > hashEnd { + markerRanges.append(NSRange(location: hashEnd, length: contentStart - hashEnd)) + } + + return BlockSpan( + kind: .heading(level: hashCount), + range: lineRange, + contentRange: cRange, + markerRanges: markerRanges + ) + } + + // MARK: Fenced code + + private struct FencedCodeOpener { + let fenceRange: NSRange + let fenceChar: UInt16 // ` or ~ + let language: String? + } + + /// Detects a fenced code block opener on `contentRange`. CommonMark allows + /// up to 3 leading spaces and a fence of 3+ backticks or 3+ tildes. + private static func fencedCodeOpener(contentRange: NSRange, in nsText: NSString) -> FencedCodeOpener? { + let lineEnd = NSMaxRange(contentRange) + var i = contentRange.location + var leading = 0 + while i < lineEnd, nsText.character(at: i) == 0x20, leading < 4 { + i += 1; leading += 1 + } + if leading >= 4 { return nil } + + guard i < lineEnd else { return nil } + let fenceChar = nsText.character(at: i) + guard fenceChar == 0x60 /* ` */ || fenceChar == 0x7E /* ~ */ else { return nil } + + let fenceStart = i + var count = 0 + while i < lineEnd, nsText.character(at: i) == fenceChar { + i += 1; count += 1 + } + guard count >= 3 else { return nil } + + // Backtick fences disallow ` anywhere on the opener line after the fence. + if fenceChar == 0x60 { + var j = i + while j < lineEnd { + if nsText.character(at: j) == 0x60 { return nil } + j += 1 + } + } + + // Language tag: rest of the line after fence, trimmed of whitespace. + var langStart = i + while langStart < lineEnd, + (nsText.character(at: langStart) == 0x20 || nsText.character(at: langStart) == 0x09) { + langStart += 1 + } + var langEnd = lineEnd + while langEnd > langStart, + (nsText.character(at: langEnd - 1) == 0x20 || nsText.character(at: langEnd - 1) == 0x09) { + langEnd -= 1 + } + let language: String? + if langStart < langEnd { + language = nsText.substring(with: NSRange(location: langStart, length: langEnd - langStart)) + } else { + language = nil + } + + return FencedCodeOpener( + fenceRange: NSRange(location: fenceStart, length: count), + fenceChar: fenceChar, + language: language + ) + } + + /// Consume lines starting after `openerLineRange` until a matching closing + /// fence (same char, at least as many) or EOF. Returns the index past the + /// last consumed character, or `nil` if no closing fence was found. + private static func consumeFencedCode( + opener: FencedCodeOpener, + openerLineRange: NSRange, + nsText: NSString, + length: Int, + state: inout ScannerState + ) -> Int? { + let contentStart = NSMaxRange(openerLineRange) + var cursor = contentStart + var closingFenceRangeStorage: NSRange? = nil + var blockEnd: Int = contentStart + + while cursor < length { + let lineEnd = nextLineEnd(in: nsText, from: cursor, length: length) + let contentRange = trimTrailingNewline(NSRange(location: cursor, length: lineEnd - cursor), in: nsText) + + if let closer = closingFenceRange(contentRange: contentRange, + opener: opener, + in: nsText) { + closingFenceRangeStorage = closer + blockEnd = lineEnd + cursor = lineEnd + break + } + + cursor = lineEnd + blockEnd = lineEnd + } + + guard let closingFence = closingFenceRangeStorage else { + return nil // unclosed + } + + let blockRange = NSRange(location: openerLineRange.location, length: blockEnd - openerLineRange.location) + let codeContentRange = NSRange(location: contentStart, length: closingFence.location - contentStart) + + let block = BlockSpan( + kind: .fencedCode(language: opener.language), + range: blockRange, + contentRange: codeContentRange, + markerRanges: [openerLineRange, closingFence] + ) + state.blocks.append(block) + return cursor + } + + /// If `contentRange` is a closing fence for `opener`, returns the range of + /// the fence characters themselves (not including leading/trailing whitespace). + /// Otherwise returns nil. + private static func closingFenceRange(contentRange: NSRange, opener: FencedCodeOpener, in nsText: NSString) -> NSRange? { + let lineEnd = NSMaxRange(contentRange) + var i = contentRange.location + var leading = 0 + while i < lineEnd, nsText.character(at: i) == 0x20, leading < 4 { + i += 1; leading += 1 + } + if leading >= 4 { return nil } + let fenceStart = i + var count = 0 + while i < lineEnd, nsText.character(at: i) == opener.fenceChar { + i += 1; count += 1 + } + guard count >= opener.fenceRange.length else { return nil } + // Only whitespace allowed after the closing fence. + while i < lineEnd { + let c = nsText.character(at: i) + if c != 0x20 && c != 0x09 { return nil } + i += 1 + } + return NSRange(location: fenceStart, length: count) + } + + // MARK: Thematic break + + private static func isThematicBreak(contentRange: NSRange, in nsText: NSString) -> Bool { + let lineEnd = NSMaxRange(contentRange) + var i = contentRange.location + var leading = 0 + while i < lineEnd, nsText.character(at: i) == 0x20, leading < 4 { + i += 1; leading += 1 + } + if leading >= 4 { return false } + guard i < lineEnd else { return false } + let marker = nsText.character(at: i) + guard marker == 0x2D /* - */ || marker == 0x5F /* _ */ || marker == 0x2A /* * */ else { return false } + var count = 0 + while i < lineEnd { + let c = nsText.character(at: i) + if c == marker { count += 1; i += 1; continue } + if c == 0x20 || c == 0x09 { i += 1; continue } + return false + } + return count >= 3 + } + + // MARK: Link reference definitions + + private struct LinkRefDefHit { + let reference: LinkReference + let labelRange: NSRange // includes the surrounding `[…]:` + let urlRange: NSRange + } + + private static let linkRefDefRegex: NSRegularExpression = { + // ^ \s{0,3} \[ label \] : \s* url \s* ( "title" | 'title' | (title) )? \s* $ + let pattern = #"^[ ]{0,3}\[([^\[\]\r\n]+)\]:[ \t]*([^\s]+)(?:[ \t]+(?:"([^"\r\n]*)"|'([^'\r\n]*)'|\(([^)\r\n]*)\)))?[ \t]*$"# + return try! NSRegularExpression(pattern: pattern, options: []) + }() + + private static func linkReferenceDefinition(contentRange: NSRange, in nsText: NSString) -> LinkRefDefHit? { + let match = linkRefDefRegex.firstMatch( + in: nsText as String, + options: [], + range: contentRange + ) + guard let m = match, m.range == contentRange else { return nil } + + let labelRange = m.range(at: 1) + let urlRange = m.range(at: 2) + guard labelRange.location != NSNotFound, urlRange.location != NSNotFound else { return nil } + let label = nsText.substring(with: labelRange) + let url = nsText.substring(with: urlRange) + + var title: String? = nil + for groupIdx in 3...5 { + let r = m.range(at: groupIdx) + if r.location != NSNotFound { + title = nsText.substring(with: r) + break + } + } + + return LinkRefDefHit( + reference: LinkReference(label: label, url: url, title: title), + labelRange: labelRange, + urlRange: urlRange + ) + } +} diff --git a/Sources/MarkdownEngine/Parser/BlockSpan.swift b/Sources/MarkdownEngine/Parser/BlockSpan.swift new file mode 100644 index 0000000..db86790 --- /dev/null +++ b/Sources/MarkdownEngine/Parser/BlockSpan.swift @@ -0,0 +1,121 @@ +// +// BlockSpan.swift +// MarkdownEngine +// +// Data model for the block phase of the two-phase Markdown parser +// (CommonMark §3, Appendix A). A `BlockSpan` is a typed range over the +// source that the block scanner emits; the inline parser runs over each +// span's `contentRange` to fill in inline structure. +// +// Phase-1 spans are flat (children always empty). Phase-2 will populate +// `children` for container blocks (blockquote, list item, etc.). +// + +import Foundation + +/// Kind of block-level construct found in the source. +/// +/// Cases marked "Phase 2" are forward-declared so adding them later +/// requires no API break in code that switches over `BlockKind`. +enum BlockKind: Equatable { + // Phase 1 + case paragraph + case heading(level: Int) // 1...6, ATX or Setext + case fencedCode(language: String?) + case thematicBreak + case list(ordered: Bool) + case listItem(indentColumns: Int) + case linkReferenceDefinition(label: String) + + // Phase 2 — forward-declared, not emitted by Phase-1 scanner + case blockquote + case table + case tableRow + case tableCell(alignment: TableCellAlignment) + case footnoteDefinition(label: String) + case definitionList + case htmlBlock +} + +enum TableCellAlignment: Equatable { + case none + case left + case center + case right +} + +/// One block-level element in the source. +/// +/// - `range`: full source range including any markers / fences. +/// - `contentRange`: substring that the inline phase processes +/// (e.g. text after `# ` for a heading, body between fences for code). +/// - `markerRanges`: ranges of opening/closing markers (e.g. `#` for ATX, +/// the two ``` lines for fenced code). Used by stylers to hide / dim markers. +/// - `children`: nested blocks for container kinds. Always empty in Phase 1. +struct BlockSpan: Equatable { + let kind: BlockKind + let range: NSRange + let contentRange: NSRange + let markerRanges: [NSRange] + var children: [BlockSpan] + + init( + kind: BlockKind, + range: NSRange, + contentRange: NSRange, + markerRanges: [NSRange] = [], + children: [BlockSpan] = [] + ) { + self.kind = kind + self.range = range + self.contentRange = contentRange + self.markerRanges = markerRanges + self.children = children + } +} + +extension BlockKind { + /// `true` when the inline phase should tokenize this block's `contentRange`. + /// Fenced code, thematic breaks, link reference definitions, and HTML + /// blocks suppress inline parsing entirely. + var allowsInlineContent: Bool { + switch self { + case .paragraph, .heading, .blockquote, .listItem, .tableCell, .definitionList: + return true + case .fencedCode, .thematicBreak, .linkReferenceDefinition, .htmlBlock, + .list, .table, .tableRow, .footnoteDefinition: + return false + } + } +} + +/// A `[label]: url "title"` definition collected during the block phase. +/// Phase 3 (inline AST) will consume the map to resolve reference-style +/// links like `[text][label]` and `![alt][label]`. +struct LinkReference: Equatable { + let label: String // raw label as written + let url: String + let title: String? + + init(label: String, url: String, title: String? = nil) { + self.label = label + self.url = url + self.title = title + } + + /// Per CommonMark, link labels are matched case-insensitively after + /// collapsing internal whitespace runs to single spaces and trimming. + var normalizedLabel: String { + let collapsed = label + .components(separatedBy: .whitespacesAndNewlines) + .filter { !$0.isEmpty } + .joined(separator: " ") + return collapsed.lowercased() + } +} + +/// Output of the block phase. +struct BlockScanResult: Equatable { + let blocks: [BlockSpan] + let linkReferences: [String: LinkReference] // keyed by `normalizedLabel` +} diff --git a/Sources/MarkdownEngine/Parser/BlockVisitor.swift b/Sources/MarkdownEngine/Parser/BlockVisitor.swift new file mode 100644 index 0000000..a3cfe5a --- /dev/null +++ b/Sources/MarkdownEngine/Parser/BlockVisitor.swift @@ -0,0 +1,30 @@ +// +// BlockVisitor.swift +// MarkdownEngine +// +// Forward-facing API for renderers / stylers / consumers that need to walk +// block structure. Phase-1 spans are always flat (children empty), but the +// default `walk` implementation already recurses so Phase 2's nested blocks +// (blockquotes, list items, table cells) work without changes to callers. +// +// Conform to `BlockVisitor` and implement `visit(_:depth:)`; call `walk(_:)` +// with the top-level block list. +// + +import Foundation + +protocol BlockVisitor { + mutating func visit(_ span: BlockSpan, depth: Int) +} + +extension BlockVisitor { + /// Traverse `blocks` depth-first, calling `visit` for each span. + mutating func walk(_ blocks: [BlockSpan], depth: Int = 0) { + for span in blocks { + visit(span, depth: depth) + if !span.children.isEmpty { + walk(span.children, depth: depth + 1) + } + } + } +} diff --git a/Sources/MarkdownEngine/Parser/MarkdownTokenizer.swift b/Sources/MarkdownEngine/Parser/MarkdownTokenizer.swift index 4f2ed6f..668c9e7 100644 --- a/Sources/MarkdownEngine/Parser/MarkdownTokenizer.swift +++ b/Sources/MarkdownEngine/Parser/MarkdownTokenizer.swift @@ -53,40 +53,77 @@ enum MarkdownTokenizer { var tokens: [MarkdownToken] = [] let nsText = text as NSString let fullRange = NSRange(location: 0, length: nsText.length) + guard nsText.length > 0 else { return [] } - // Emphasis via stack parser. - tokens.append(contentsOf: parseEmphasisTokens(in: text)) + // ---------- Block phase ---------- + let blockResult = BlockScanner.scan(text) - // Image embeds ![[Name]] (must be parsed before wikiLinks) + // Convert block spans into block-kind MarkdownTokens that the styler + // already understands. (Headings, fenced code; thematic breaks and + // link reference definitions don't have legacy MarkdownTokenKind + // counterparts and are tracked only via BlockScanResult for now.) + // + // BlockScanner emits ranges over whole lines (including trailing + // newlines) — the legacy regex-based parser excluded the trailing + // newline from `.heading` / `.codeBlock` token ranges, so we trim it + // here to keep the golden snapshot stable. + for span in blockResult.blocks { + switch span.kind { + case .heading: + tokens.append(MarkdownToken( + kind: .heading, + range: trimTrailingNewline(span.range, in: nsText), + contentRange: span.contentRange, + markerRanges: span.markerRanges + )) + case .fencedCode: + tokens.append(MarkdownToken( + kind: .codeBlock, + range: trimTrailingNewline(span.range, in: nsText), + contentRange: span.contentRange, + markerRanges: span.markerRanges + )) + default: + break + } + } + + // ---------- Inline phase ---------- + var inlineTokens: [MarkdownToken] = [] + + // Emphasis (stack parser, already line-scoped). + inlineTokens.append(contentsOf: parseEmphasisTokens(in: text)) + + // Image embeds ![[...]] (parsed before wiki-links). var imageEmbedRanges: [NSRange] = [] for match in imageEmbedRegex.matches(in: text, options: [], range: fullRange) { let full = match.range(at: 0) let content = match.range(at: 1) - let openMarker = NSRange(location: full.location, length: 3) // ![[ - let closeMarker = NSRange(location: full.location + full.length - 2, length: 2) // ]] - tokens.append(MarkdownToken(kind: .imageEmbed, - range: full, - contentRange: content, - markerRanges: [openMarker, closeMarker])) + let openMarker = NSRange(location: full.location, length: 3) + let closeMarker = NSRange(location: full.location + full.length - 2, length: 2) + inlineTokens.append(MarkdownToken(kind: .imageEmbed, + range: full, + contentRange: content, + markerRanges: [openMarker, closeMarker])) imageEmbedRanges.append(full) } - // Node links [[Name]] + // Wiki-links [[...]] for match in wikiLinkRegex.matches(in: text, options: [], range: fullRange) { let full = match.range(at: 0) - // Skip ranges already claimed by imageEmbed tokens - let overlapsImage = imageEmbedRanges.contains { NSIntersectionRange($0, full).length > 0 } - if overlapsImage { continue } + if imageEmbedRanges.contains(where: { NSIntersectionRange($0, full).length > 0 }) { + continue + } let content = match.range(at: 1) let open = NSRange(location: full.location, length: 2) let close = NSRange(location: full.location + full.length - 2, length: 2) - tokens.append(MarkdownToken(kind: .wikiLink, - range: full, - contentRange: content, - markerRanges: [open, close])) + inlineTokens.append(MarkdownToken(kind: .wikiLink, + range: full, + contentRange: content, + markerRanges: [open, close])) } - // Markdown links [Text](URL) + // Markdown links [text](url) for match in markdownLinkRegex.matches(in: text, options: [], range: fullRange) { let full = match.range let textRange = match.range(at: 1) @@ -95,100 +132,114 @@ enum MarkdownTokenizer { let closeBracket = NSRange(location: textRange.location + textRange.length, length: 1) let openParen = NSRange(location: urlRange.location - 1, length: 1) let closeParen = NSRange(location: urlRange.location + urlRange.length, length: 1) - tokens.append(MarkdownToken(kind: .link, - range: full, - contentRange: textRange, - markerRanges: [openBracket, closeBracket, openParen, closeParen])) + inlineTokens.append(MarkdownToken(kind: .link, + range: full, + contentRange: textRange, + markerRanges: [openBracket, closeBracket, openParen, closeParen])) } - // Headings #... up to ###### - for match in headingRegex.matches(in: text, options: [], range: fullRange) { - let fullMatchRange = match.range(at: 0) - let hashes = match.range(at: 1) - let content = match.range(at: 2) - let leadingWsLength = hashes.location - fullMatchRange.location - let tokenRange = NSRange(location: hashes.location, length: fullMatchRange.length - leadingWsLength) - var markerRanges = [hashes] - let hashEnd = hashes.location + hashes.length - if hashEnd < nsText.length { - let spaceRange = NSRange(location: hashEnd, length: 1) - if nsText.substring(with: spaceRange) == " " { - markerRanges.append(spaceRange) - } - } - tokens.append(MarkdownToken(kind: .heading, - range: tokenRange, - contentRange: content, - markerRanges: markerRanges)) - } - - // Fenced code blocks ```lang\n...\n``` - for match in codeBlockRegex.matches(in: text, options: [], range: fullRange) { - let full = match.range(at: 0) - let contentRange = match.range(at: 2) - let closingFence = match.range(at: 3) - let tokenEnd = closingFence.location + closingFence.length - let tokenRange = NSRange(location: full.location, length: tokenEnd - full.location) - let openingLength = max(3, min(contentRange.location - tokenRange.location, tokenRange.length)) - let openingMarker = NSRange(location: tokenRange.location, length: openingLength) - _ = contentRange.location + contentRange.length - let closingMarker = closingFence - - tokens.append(MarkdownToken(kind: .codeBlock, - range: tokenRange, - contentRange: contentRange, - markerRanges: [openingMarker, closingMarker])) - } - - // Block LaTeX $$...$$ (multiline) + // Block LaTeX $$...$$ — runs only against ranges outside fenced code. for match in blockLatexRegex.matches(in: text, options: [], range: fullRange) { let full = match.range(at: 0) - let inCode = tokens.contains { $0.kind == .codeBlock && NSIntersectionRange($0.range, full).length > 0 } - if inCode { continue } - + if isInsideFencedCode(range: full, blocks: blockResult.blocks) { continue } let content = match.range(at: 1) let openMarker = NSRange(location: full.location, length: 2) let closeMarker = NSRange(location: full.location + full.length - 2, length: 2) - tokens.append(MarkdownToken(kind: .blockLatex, - range: full, - contentRange: content, - markerRanges: [openMarker, closeMarker])) + inlineTokens.append(MarkdownToken(kind: .blockLatex, + range: full, + contentRange: content, + markerRanges: [openMarker, closeMarker])) } - // Inline code `code` + // Inline code `…` for match in inlineCodeRegex.matches(in: text, options: [], range: fullRange) { let full = match.range(at: 0) let content = match.range(at: 1) let openBacktick = NSRange(location: full.location, length: 1) let closeBacktick = NSRange(location: full.location + full.length - 1, length: 1) - tokens.append(MarkdownToken(kind: .inlineCode, - range: full, - contentRange: content, - markerRanges: [openBacktick, closeBacktick])) + inlineTokens.append(MarkdownToken(kind: .inlineCode, + range: full, + contentRange: content, + markerRanges: [openBacktick, closeBacktick])) } - // Inline LaTeX $formula$ + // Inline LaTeX $…$ for match in inlineLatexRegex.matches(in: text, options: [], range: fullRange) { let full = match.range(at: 0) let content = match.range(at: 1) - let isInsideBlock = tokens.contains { - ($0.kind == .codeBlock || $0.kind == .blockLatex) && - NSIntersectionRange($0.range, full).length > 0 - } - if isInsideBlock { continue } + if isInsideFencedCode(range: full, blocks: blockResult.blocks) { continue } + if isInsideBlockLatexInline(range: full, inlineTokens: inlineTokens) { continue } let contentString = nsText.substring(with: content) if !isInlineMathContent(contentString) { continue } let openDollar = NSRange(location: full.location, length: 1) let closeDollar = NSRange(location: full.location + full.length - 1, length: 1) - tokens.append(MarkdownToken(kind: .inlineLatex, - range: full, - contentRange: content, - markerRanges: [openDollar, closeDollar])) + inlineTokens.append(MarkdownToken(kind: .inlineLatex, + range: full, + contentRange: content, + markerRanges: [openDollar, closeDollar])) + } + + // ---------- Block-precedence filter ---------- + let allowedInline = inlineContainerRanges(from: blockResult.blocks) + for t in inlineTokens { + if rangeIsInside(t.range, anyOf: allowedInline) { + tokens.append(t) + } } return tokens } + // MARK: - Helpers used by parseTokens + + /// Content ranges of all blocks that allow inline tokenization. + private static func inlineContainerRanges(from blocks: [BlockSpan]) -> [NSRange] { + blocks.compactMap { $0.kind.allowsInlineContent ? $0.contentRange : nil } + } + + /// True when `range` is fully contained in any one of the allowed ranges. + private static func rangeIsInside(_ range: NSRange, anyOf allowed: [NSRange]) -> Bool { + if allowed.isEmpty { return false } + let end = NSMaxRange(range) + for a in allowed { + if range.location >= a.location && end <= NSMaxRange(a) { + return true + } + } + return false + } + + private static func isInsideFencedCode(range: NSRange, blocks: [BlockSpan]) -> Bool { + for b in blocks { + if case .fencedCode = b.kind, NSIntersectionRange(b.range, range).length > 0 { + return true + } + } + return false + } + + private static func isInsideBlockLatexInline(range: NSRange, inlineTokens: [MarkdownToken]) -> Bool { + for t in inlineTokens where t.kind == .blockLatex { + if NSIntersectionRange(t.range, range).length > 0 { return true } + } + return false + } + + /// Trim a single trailing CR, LF, or CRLF from `range` (relative to `nsText`). + private static func trimTrailingNewline(_ range: NSRange, in nsText: NSString) -> NSRange { + var length = range.length + let end = range.location + length + if length >= 2, + nsText.character(at: end - 2) == 0x0D, + nsText.character(at: end - 1) == 0x0A { + length -= 2 + } else if length >= 1 { + let last = nsText.character(at: end - 1) + if last == 0x0A || last == 0x0D { length -= 1 } + } + return NSRange(location: range.location, length: length) + } + // MARK: - Code Block Helpers static func extractLanguage(from token: MarkdownToken, in text: String) -> String? {