knowledgestack · arnav2 · May 21, 2026 · May 20, 2026
diff --git a/src/ks_xlsx_parser/rendering/text_renderer.py b/src/ks_xlsx_parser/rendering/text_renderer.py
@@ -13,7 +13,7 @@
 
 from ks_xlsx_parser.models.block import BlockDTO
 from ks_xlsx_parser.models.chart import ChartDTO
-from ks_xlsx_parser.models.common import BlockType, col_number_to_letter
+from ks_xlsx_parser.models.common import BlockType, CellCoord, col_number_to_letter
 from ks_xlsx_parser.models.sheet import SheetDTO
 
 logger = logging.getLogger(__name__)
@@ -57,9 +57,15 @@ def _format_number_for_retrieval(raw: int | float) -> str:
 def _cell_render_value(cell) -> str:
     """Pick the string form of `cell` that's best for RAG retrieval.
 
-    For *numeric* cells we ignore the display-formatted string and emit
-    the raw value verbatim — Excel's commas, percent signs, trailing
-    zeros, and currency symbols all defeat substring search.
+    For *numeric* cells we emit the raw value verbatim — Excel's
+    commas, percent signs, trailing zeros, and currency symbols all
+    defeat substring search. **When the cell carries a number-format
+    that meaningfully changes the displayed string (e.g. 0.06 → "6%",
+    1272 → "1,272.00", 46022 → "2025-12-31"), we additionally append
+    the formatted form in `[brackets]`** so substring match hits
+    either the raw or the displayed shape — the question may quote
+    either, and `answer.xlsx` may use the display form even though
+    `input.xlsx` keeps the raw.
 
     For dates we emit ISO ``YYYY-MM-DD`` (no time component) which is
     both human-readable and matches the date format that openpyxl /
@@ -79,7 +85,15 @@ def _cell_render_value(cell) -> str:
         return raw.isoformat()
 
     if isinstance(raw, (int, float)) and not isinstance(raw, bool):
-        return _format_number_for_retrieval(raw)
+        raw_str = _format_number_for_retrieval(raw)
+        # If a meaningful number-format produced a different display
+        # string, emit both forms. Skip when the displayed form is
+        # identical to the raw (no information added) or trivially
+        # convertible (just trailing zeros), to keep render_text terse.
+        disp = (cell.display_value or "").strip()
+        if disp and disp != raw_str and not _is_trivial_format_diff(raw_str, disp):
+            return f"{raw_str} [{disp}]"
+        return raw_str
 
     if cell.display_value is not None:
         return str(cell.display_value)
@@ -88,6 +102,28 @@ def _cell_render_value(cell) -> str:
     return ""
 
 
+def _is_trivial_format_diff(raw_str: str, display_str: str) -> bool:
+    """True if `display_str` adds no retrieval value over `raw_str`.
+
+    Trivial: ``"1272"`` → ``"1272.0"`` / ``"1272.00"`` (trailing zeros
+    only, no other formatting change). The displayed form contributes
+    no new tokens substring-search could hit.
+
+    NOT trivial: ``"1272"`` → ``"1,272.00"`` (thousands separator), or
+    ``"0.06"`` → ``"6%"``, or ``"1272"`` → ``"$1,272"``. Each of these
+    surfaces a distinct token a user might quote.
+    """
+    if raw_str == display_str:
+        return True
+    # Trim trailing zeros after a decimal point on the displayed form.
+    # If what remains equals the raw, the only difference was insignificant.
+    if "." in display_str:
+        head, tail = display_str.split(".", 1)
+        if tail.rstrip("0") == "" and head == raw_str:
+            return True
+    return False
+
+
 class TextRenderer:
     """
     Renders blocks as plain text with coordinate context.
@@ -105,11 +141,17 @@ def render_block(self, block: BlockDTO) -> str:
 
         Format:
             [Sheet1!A1:D10] (table: "SalesData")
-            | A        | B       | C      | D       |
-            |----------|---------|--------|---------|
-            | Product  | Q1      | Q2     | Q3      |
-            | Widget A | 100     | 150    | 200     |
+                  | A        | B       | C      | D       |
+                  |----------|---------|--------|---------|
+            r1    | Product  | Q1      | Q2     | Q3      |
+            r2    | Widget A | 100     | 150    | 200     |
             ...
+
+        Per-row `r<N>` prefix carries the sheet row number so a
+        downstream LLM consumer can compute cell coordinates
+        deterministically (block header gives the A1 range; per-row
+        anchors close the gap to (row, col)). The prefix width is
+        sized to the largest row number in the block.
         """
         rng = block.cell_range
         rows = range(rng.top_left.row, rng.bottom_right.row + 1)
@@ -124,35 +166,73 @@ def render_block(self, block: BlockDTO) -> str:
             header += f' table: "{block.table_name}"'
         lines.append(header)
 
+        # Row-anchor width — `r<N>` plus padding. Sized once per block
+        # so all rows align under a constant-width column.
+        row_anchor_width = max(len(f"r{r}") for r in rows)
+        row_anchor_pad = " " * row_anchor_width  # blank slot for header / separator
+
+        # Build slave→master lookup for merged regions on this sheet.
+        # Slave cells (everything in a merged range except the master)
+        # render the master's value with a `←` propagation marker so
+        # the chunk's text contains the visible value at every position
+        # it appears in Excel, not just the top-left of the region.
+        merged_master: dict[tuple[int, int], CellCoord] = {}
+        for region in self._sheet.merged_regions:
+            mr = region.range
+            master = region.master
+            for r in range(mr.top_left.row, mr.bottom_right.row + 1):
+                for c in range(mr.top_left.col, mr.bottom_right.col + 1):
+                    if r == master.row and c == master.col:
+                        continue
+                    merged_master[(r, c)] = master
+
+        def _value_for(row: int, col: int) -> tuple[str, bool]:
+            """Return (rendered string, is_propagated_from_master)."""
+            cell = self._sheet.get_cell(row, col)
+            if cell is not None and not cell.is_merged_slave:
+                val = _cell_render_value(cell)
+                if cell.formula and not val.startswith("="):
+                    val = f"{val} [=]"
+                return _flatten_cell_text(val), False
+            # Slave: propagate the master's value.
+            master = merged_master.get((row, col))
+            if master is None:
+                return "", False
+            master_cell = self._sheet.get_cell(master.row, master.col)
+            if master_cell is None:
+                return "", False
+            mval = _cell_render_value(master_cell)
+            if master_cell.formula and not mval.startswith("="):
+                mval = f"{mval} [=]"
+            return _flatten_cell_text(f"← {mval}"), True
+
         # Compute column widths using the SAME rendering rules the data
-        # rows will use, including the trailing `[=]` formula marker.
-        # Otherwise `[=]` inflates a cell past col_width post-hoc and
-        # spuriously triggers the long-value fallback below.
+        # rows will use, including the trailing `[=]` formula marker
+        # AND the merged-cell `←` propagation marker. Otherwise these
+        # inflate a cell past col_width post-hoc and spuriously trigger
+        # the long-value fallback below.
         col_widths: dict[int, int] = {}
         for col in cols:
             col_letter = col_number_to_letter(col)
             max_width = len(col_letter)
             for row in rows:
-                cell = self._sheet.get_cell(row, col)
-                if cell is None:
-                    continue
-                val = _cell_render_value(cell)
-                if cell.formula and not val.startswith("="):
-                    val = f"{val} [=]"
-                val = _flatten_cell_text(val)
+                val, _ = _value_for(row, col)
                 max_width = max(max_width, len(val))
             col_widths[col] = min(max_width, 30)  # Cap at 30 for alignment; text may overflow
 
-        # Column header row
+        # Column header row — leading blank slot matches the row-anchor width.
         col_headers = []
         for col in cols:
             if col in self._sheet.hidden_cols:
                 continue
             letter = col_number_to_letter(col)
             col_headers.append(letter.ljust(col_widths[col]))
-        lines.append("| " + " | ".join(col_headers) + " |")
+        lines.append(row_anchor_pad + " | " + " | ".join(col_headers) + " |")
         lines.append(
-            "|-" + "-|-".join("-" * col_widths[c] for c in cols if c not in self._sheet.hidden_cols) + "-|"
+            row_anchor_pad
+            + " |-"
+            + "-|-".join("-" * col_widths[c] for c in cols if c not in self._sheet.hidden_cols)
+            + "-|"
         )
 
         # Data rows
@@ -161,28 +241,21 @@ def render_block(self, block: BlockDTO) -> str:
             if row in self._sheet.hidden_rows:
                 continue
 
+            anchor = f"r{row}".ljust(row_anchor_width)
+
             values = []
             for col in cols:
                 if col in self._sheet.hidden_cols:
                     continue
-                cell = self._sheet.get_cell(row, col)
-                val = _cell_render_value(cell) if cell else ""
-
-                if cell and cell.formula and not val.startswith("="):
-                    val = f"{val} [=]"
-
-                # Markdown table rows are single-line; collapse embedded newlines
-                # (common in headers like "租金\n天数") so they don't break the grid.
-                val = _flatten_cell_text(val)
-
+                val, _ = _value_for(row, col)
                 # Long-value fallback: only triggers if the rendered string
                 # genuinely exceeds the (now consistently-computed) column
                 # width — i.e. the column was capped at 30. We still emit
                 # the full retrieval value (no truncation) and let the
                 # alignment overflow; truncating destroys retrievability.
                 values.append(val.ljust(col_widths[col]))
 
-            line = "| " + " | ".join(values) + " |"
+            line = anchor + " | " + " | ".join(values) + " |"
             lines.append(line)
 
             # Add separator after first row if it looks like a header
@@ -191,7 +264,8 @@ def render_block(self, block: BlockDTO) -> str:
                 BlockType.ASSUMPTIONS_TABLE,
             ):
                 lines.append(
-                    "|-"
+                    row_anchor_pad
+                    + " |-"
                     + "-|-".join(
                         "-" * col_widths[c]
                         for c in cols