From 4d410c8ff418b759f17721558666a25f5d4bd7a0 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Wed, 3 Jun 2026 11:06:03 -0400 Subject: [PATCH] feat(markdown): implement GFM table parser --- src/markdown_table.py | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/markdown_table.py b/src/markdown_table.py index aa36ebc2..dfcccd84 100644 --- a/src/markdown_table.py +++ b/src/markdown_table.py @@ -1,10 +1,51 @@ +import re from dataclasses import dataclass +_TABLE_SEPARATOR = re.compile(r"^\|?\s*:?-{2,}:?\s*(\|\s*:?-{2,}:?\s*)+\|?\s*$") + @dataclass(frozen=True) class TableBlock: + """Frozen GFM table block. + [C: src/markdown_helper.py:MarkdownRenderer.render] + """ headers: list[str] rows: list[list[str]] span: tuple[int, int] +def _split_row(line: str) -> list[str]: + line = line.strip() + if line.startswith("|"): line = line[1:] + if line.endswith("|"): line = line[:-1] + return [c.strip() for c in line.split("|")] + +def _is_table_at(lines: list[str], i: int) -> bool: + if i + 1 >= len(lines): return False + if "|" not in lines[i]: return False + return bool(_TABLE_SEPARATOR.match(lines[i + 1])) + def parse_tables(text: str) -> list[TableBlock]: - return [] + lines = text.splitlines() + in_fence = False + blocks: list[TableBlock] = [] + i = 0 + while i < len(lines): + line = lines[i] + if line.strip().startswith("```"): + in_fence = not in_fence + i += 1 + continue + if in_fence: + i += 1 + continue + if _is_table_at(lines, i): + headers = _split_row(lines[i]) + j = i + 2 + rows: list[list[str]] = [] + while j < len(lines) and "|" in lines[j] and not _TABLE_SEPARATOR.match(lines[j]): + rows.append(_split_row(lines[j])) + j += 1 + blocks.append(TableBlock(headers=headers, rows=rows, span=(i, j))) + i = j + continue + i += 1 + return blocks