diff --git a/.flake8 b/.flake8
deleted file mode 100644
index ea058021..00000000
--- a/.flake8
+++ /dev/null
@@ -1,3 +0,0 @@
-[flake8]
-max-line-length = 88
-max-complexity = 8
\ No newline at end of file
diff --git a/.github/workflows/auto-merge-dependabot.yml b/.github/workflows/auto-merge-dependabot.yml
index 6c73eb04..93779f1c 100644
--- a/.github/workflows/auto-merge-dependabot.yml
+++ b/.github/workflows/auto-merge-dependabot.yml
@@ -23,9 +23,7 @@ jobs:
               if: "${{ steps.metadata.outputs.update-type ==
                   'version-update:semver-minor' ||
                   steps.metadata.outputs.update-type ==
-                  'version-update:semver-patch' ||
-                  steps.metadata.outputs.dependency-names ==
-                  'black' }}"
+                  'version-update:semver-patch' }}"
 
               # https://cli.github.com/manual/gh_pr_merge
               run: gh pr merge --auto --squash "$PR_URL"
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
deleted file mode 100644
index 2e7dab02..00000000
--- a/.github/workflows/black.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-name: Code formatting
-
-on:
-  push:
-    branches: [ master ]
-  pull_request:
-    branches: [ master ]
-
-jobs:
-  build:
-
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: actions/checkout@v6
-    - name: Set up Python
-      uses: actions/setup-python@v6
-      with:
-        python-version: 3.x
-
-    - name: Install black
-      run: |
-        # black = "^21.7b0"
-        export BLACK_VERSION=$(grep black pyproject.toml | egrep -o '\^[0-9a-z.]+' | sed 's/\^//g')
-
-        set -x
-        pip install black==${BLACK_VERSION}
-
-    # https://pypi.org/project/black/
-    - name: Check code formatting
-      run: |
-        black --check .
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 00000000..6f6a09a2
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,30 @@
+name: Lint
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v6
+    - name: Set up Python
+      uses: actions/setup-python@v6
+      with:
+        python-version: "3.12"
+
+    - name: Install Poetry
+      uses: snok/install-poetry@v1.4.1
+      with:
+        version: latest
+        virtualenvs-create: true
+        virtualenvs-in-project: true
+
+    - name: Install dependencies with poetry
+      run: poetry install --no-root
+
+    - name: Lint with ruff
+      run: make lint
diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 8d3532b1..95990755 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -42,9 +42,6 @@ jobs:
     - name: Install Python wheel support to speed up things
       run: pip install wheel
 
-    - name: Pre-install black
-      run: pip install black
-
     # https://github.com/marketplace/actions/install-poetry-action
     - name: Install Poetry
       uses: snok/install-poetry@v1.4.1
@@ -77,8 +74,5 @@ jobs:
         pip install coveralls
         poetry run coveralls --service=github
 
-    - name: Lint with pylint
-      run: make lint
-
     - name: Build a distribution package
       run: poetry build -vvv
diff --git a/.github/workflows/type-check.yml b/.github/workflows/type-check.yml
new file mode 100644
index 00000000..6d3383de
--- /dev/null
+++ b/.github/workflows/type-check.yml
@@ -0,0 +1,30 @@
+name: Type Check
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+
+jobs:
+  type-check:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v6
+    - name: Set up Python
+      uses: actions/setup-python@v6
+      with:
+        python-version: "3.12"
+
+    - name: Install Poetry
+      uses: snok/install-poetry@v1.4.1
+      with:
+        version: latest
+        virtualenvs-create: true
+        virtualenvs-in-project: true
+
+    - name: Install dependencies with poetry
+      run: poetry install --no-root
+
+    - name: Type check with mypy
+      run: make type_check
diff --git a/AGENTS.md b/AGENTS.md
index 0abed2d6..82922557 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -13,31 +13,78 @@ This file contains important information about the sql-metadata repository for A
 
 **Technology Stack:**
 - Python 3.10+
-- sqlparse library for tokenization
+- sqlglot library for SQL parsing and AST construction
+- sqlparse used only for legacy tokenization fallback
 - Poetry for dependency management
 - pytest for testing
-- flake8 and pylint for linting
+- ruff for linting and formatting
 
 ## Repository Structure
 
 ```
 sql-metadata/
-├── sql_metadata/          # Main package
-│   ├── parser.py         # Core Parser class
-│   ├── token.py          # SQLToken and EmptyToken classes
-│   ├── keywords_lists.py # SQL keyword definitions
-│   └── __init__.py
-├── test/                 # Test suite
+├── sql_metadata/                  # Main package
+│   ├── parser.py                 # Public facade — Parser class
+│   ├── ast_parser.py             # ASTParser — thin orchestrator, composes SqlCleaner + DialectParser
+│   ├── sql_cleaner.py            # SqlCleaner — raw SQL preprocessing (no sqlglot dependency)
+│   ├── dialect_parser.py         # DialectParser — dialect detection, parsing, quality validation
+│   ├── column_extractor.py       # ColumnExtractor — single-pass DFS column/alias extraction
+│   ├── table_extractor.py        # TableExtractor — table extraction with position sorting
+│   ├── nested_resolver.py        # NestedResolver — CTE/subquery names, bodies, resolution
+│   ├── query_type_extractor.py   # QueryTypeExtractor — query type detection
+│   ├── comments.py               # Comment extraction/stripping (pure functions)
+│   ├── keywords_lists.py         # QueryType/TokenType enums, keyword sets
+│   ├── utils.py                  # UniqueList, flatten_list, shared helpers
+│   ├── generalizator.py          # Query anonymisation
+│   └── __init__.py               # Exports: Parser, QueryType
+├── test/                          # Test suite (25 test files)
 │   ├── test_with_statements.py
 │   ├── test_getting_tables.py
 │   ├── test_getting_columns.py
-│   └── ... (30+ test files)
-├── pyproject.toml        # Poetry configuration
-├── Makefile             # Common commands
-├── .flake8              # Flake8 configuration
+│   └── ...
+├── ARCHITECTURE.md               # Detailed architecture docs with Mermaid diagrams
+├── pyproject.toml                # Poetry configuration
+├── Makefile                      # Common commands
 └── README.md
 ```
 
+## Architecture Overview
+
+The v3 architecture uses sqlglot to build an AST, then walks it with specialised extractor classes composed by a thin `Parser` facade. See [ARCHITECTURE.md](ARCHITECTURE.md) for detailed module deep dives, traced walkthroughs, and Mermaid diagrams.
+
+### Pipeline
+
+```
+Raw SQL → SqlCleaner (preprocessing)
+       → DialectParser (dialect detection, sqlglot.parse())
+       → sqlglot AST (cached by ASTParser)
+       → TableExtractor (tables, table aliases)
+       → ColumnExtractor (columns, column aliases — single-pass DFS)
+       → NestedResolver (CTE/subquery names + bodies, column resolution)
+       → Final metadata (cached on Parser)
+```
+
+### Key Design Patterns
+
+- **Composition over inheritance** — `Parser` composes `ASTParser`, `TableExtractor`, `ColumnExtractor`, `NestedResolver`, `QueryTypeExtractor`
+- **Lazy evaluation with caching** — properties compute on first access, cache the result
+- **Single-pass DFS** — `ColumnExtractor` walks AST in `arg_types` key order (mirrors SQL text order)
+- **Multi-dialect retry** — `ASTParser` tries several sqlglot dialects, picks first non-degraded result
+- **Graceful regex fallbacks** — degrades to regex when sqlglot parse fails
+
+### Class Responsibilities
+
+| Class | Owns | Does NOT own |
+|-------|------|-------------|
+| `Parser` | Facade, caching, regex fallbacks, value extraction | No extraction logic |
+| `ASTParser` | Orchestration, lazy AST caching | No preprocessing, no parsing |
+| `SqlCleaner` | Raw SQL preprocessing (REPLACE rewrite, comment strip, CTE normalisation) | No AST, no sqlglot |
+| `DialectParser` | Dialect detection, sqlglot parsing, parse-quality validation | No preprocessing |
+| `ColumnExtractor` | Column names, column aliases (during DFS walk) | CTE/subquery name extraction (standalone) |
+| `TableExtractor` | Table names, table aliases, position sorting | Nothing else |
+| `NestedResolver` | CTE/subquery names, CTE/subquery bodies, column resolution | Column extraction |
+| `QueryTypeExtractor` | Query type detection | Nothing else |
+
 ## Development Workflow
 
 ### Setup
@@ -55,107 +102,45 @@ poetry run pytest test/test_with_statements.py::test_name  # Run specific test
 
 ### Linting
 ```bash
-make lint              # Run flake8 and pylint
-poetry run flake8 sql_metadata/
-poetry run pylint sql_metadata/
+make lint              # Run ruff check with auto-fix
+poetry run ruff check --fix sql_metadata
 ```
 
 ### Code Formatting
 ```bash
-make format            # Run black formatter
+make format            # Run ruff formatter
+poetry run ruff format .
+```
+
+### Type Checking
+```bash
+poetry run mypy sql_metadata
 ```
 
 ### Coverage
 ```bash
 make coverage          # Run tests with coverage report
+poetry run pytest -vv --cov=sql_metadata --cov-report=term-missing
 ```
 
 **Important:** The project has a 100% test coverage requirement (`fail_under = 100` in pyproject.toml).
 
+### Verification after changes
+After making code changes, always run all three checks:
+```bash
+poetry run pytest -vv --cov=sql_metadata --cov-report=term-missing  # tests + coverage
+poetry run mypy sql_metadata                                         # type checking
+poetry run ruff check sql_metadata                                   # linting
+```
+
 ## Code Quality Standards
 
-### Flake8 Configuration (.flake8)
-- Max line length: Not explicitly set (defaults apply)
+### Ruff Configuration (pyproject.toml)
+- Max line length: 88
 - Max complexity: 8 (C901 error for complexity > 8)
+- Enabled rule sets: E, F, W (pycodestyle/pyflakes), C90 (mccabe), I (isort)
 - Exceptions: Use `# noqa: C901` for complex but necessary functions
 
-### Complexity Suppression Pattern
-When a function legitimately needs higher complexity, suppress the warning:
-```python
-@property
-def complex_method(self) -> Type:  # noqa: C901
-    """Method with necessary complexity"""
-```
-
-Examples in codebase:
-- `parser.py:134`: `tokens` property
-- `parser.py:450`: `with_names` property
-- `parser.py:822`: `_resolve_nested_query` method
-
-### Pylint
-The Parser class has `# pylint: disable=R0902` to suppress "too many instance attributes" warnings.
-
-## Parser Architecture
-
-### Core Class: `Parser`
-Located in `sql_metadata/parser.py`
-
-The Parser class uses sqlparse to tokenize SQL and then processes tokens to extract metadata.
-
-**Key Properties (lazy evaluation):**
-- `tokens` - Tokenized SQL
-- `tables` - Tables referenced in query
-- `columns` - Columns referenced
-- `with_names` - CTE (Common Table Expression) names
-- `with_queries` - CTE definitions
-- `query_type` - Type of SQL query
-- `subqueries` - Subquery definitions
-
-**Important Pattern:** Most properties cache their results:
-```python
-@property
-def example(self):
-    if self._example is not None:
-        return self._example
-    # ... computation ...
-    self._example = result
-    return self._example
-```
-
-### Token Processing
-
-The parser processes `SQLToken` objects which have properties like:
-- `value` - The token text
-- `normalized` - Uppercased token value
-- `next_token` - Next token in sequence
-- `previous_token` - Previous token
-- `next_token_not_comment` - Next non-comment token
-- `is_as_keyword` - Boolean flag
-- `is_with_query_end` - Boolean flag for WITH clause boundaries
-- `token_type` - Type classification
-
-### WITH Statement Parsing
-
-Located in `parser.py:450` (`with_names` property)
-
-**Key Logic:**
-1. Iterates through tokens looking for "WITH" keywords
-2. Enters a while loop that stays in WITH block until finding ending keywords
-3. Processes each CTE by finding "AS" keywords and extracting names
-4. Advances through tokens until finding `is_with_query_end`
-5. Checks if at end of WITH block using `WITH_ENDING_KEYWORDS`
-
-**WITH_ENDING_KEYWORDS** (from `keywords_lists.py`):
-- UPDATE
-- SELECT
-- DELETE
-- REPLACE
-- INSERT
-
-**Common Pitfall:** Malformed SQL with consecutive AS keywords (e.g., `WITH a AS (...) AS b`) can cause infinite loops if not properly detected and handled.
-
-**Solution Pattern:** After processing a WITH clause, always check if the next token is another AS keyword (which indicates malformed SQL) and raise `ValueError("This query is wrong")`.
-
 ## Error Handling Patterns
 
 ### Malformed SQL Detection
@@ -163,7 +148,7 @@ Located in `parser.py:450` (`with_names` property)
 The codebase has established patterns for handling malformed SQL:
 
 1. **Detect the malformed pattern early**
-2. **Raise `ValueError("This query is wrong")`** - This is the standard error message
+2. **Raise `ValueError("This query is wrong")`** — This is the standard error message
 3. **Use pytest.raises in tests:**
 ```python
 parser = Parser(malformed_query)
@@ -171,39 +156,14 @@ with pytest.raises(ValueError, match="This query is wrong"):
     parser.tables
 ```
 
-Examples:
-- `test_with_statements.py:500-528`: Tests for malformed WITH queries
-- `parser.py:679`: Detection in `_handle_with_name_save`
-
-### Infinite Loop Prevention
-
-When processing tokens in loops:
-1. Always ensure the token advances in each iteration
-2. Check for malformed patterns before looping back
-3. Have clear exit conditions
-
-Pattern:
-```python
-while condition and token.next_token:
-    if some_pattern:
-        # ... process ...
-        if exit_condition:
-            break
-        else:
-            # Always advance token to prevent infinite loop
-            token = token.next_token
-    else:
-        token = token.next_token
-```
-
 ## Testing Patterns
 
 ### Test Organization
 Tests are organized by feature/SQL clause:
-- `test_with_statements.py` - WITH clause (CTEs)
-- `test_getting_tables.py` - Table extraction
-- `test_getting_columns.py` - Column extraction
-- `test_query_type.py` - Query type detection
+- `test_with_statements.py` — WITH clause (CTEs)
+- `test_getting_tables.py` — Table extraction
+- `test_getting_columns.py` — Column extraction
+- `test_query_type.py` — Query type detection
 - Database-specific: `test_mssql_server.py`, `test_postgress.py`, `test_hive.py`, etc.
 
 ### Test Naming Convention
@@ -231,134 +191,47 @@ def test_malformed_case():
 - Every bug fix needs a test that would have caught the bug
 - Coverage must remain at 100%
 
+### Test Comments
+Reference issues in test comments:
+```python
+def test_issue_fix():
+    # Test for issue #556 - malformed WITH query causes infinite loop
+    # https://github.com/macbre/sql-metadata/issues/556
+```
+
 ## Git Workflow
 
 ### Commit Message Format
 Following the established pattern:
 
 ```
-Brief description of change 
+Brief description of change
 
 Resolves #issue-number.
 
-More detailed explanation of what was wrong and why.
-
-The issue was: [explain the problem]
-
-This fix:
-- Bullet point 1
-- Bullet point 2
-- Bullet point 3
-
 Co-Authored-By: Claude <noreply@anthropic.com>
 ```
 
 ### Branch Naming
 - Feature: `feature/description`
 - Bug fix: `fix/description`
-- Example: `fix/parser-tables-hangs`
-
-### Recent Commits (as of 2026-03-04)
-```
-1fbfee4 Drop Python 3.9 support (#604)
-d0e6fc6 Parser.columns drops column named 'source' when it is the last column in a SELECT statement (#603)
-```
-
-## Common Issues and Solutions
-
-### Issue: Parser Hangs/Infinite Loop
-
-**Symptoms:** Parser never returns when calling `.tables` or other properties
-
-**Common Causes:**
-1. Token not advancing in a while loop
-2. Malformed SQL not detected early enough
-3. Missing exit condition in nested loops
-
-**Solution Checklist:**
-- [ ] Ensure token advances in all loop branches
-- [ ] Check for malformed SQL patterns and raise ValueError
-- [ ] Verify exit conditions are reachable
-- [ ] Add timeout test to verify fix
-
-### Issue: Flake8 Complexity Warning (C901)
-
-**When it happens:** Function exceeds complexity threshold of 8
-
-**Solutions:**
-1. Refactor to reduce complexity (preferred)
-2. Use `# noqa: C901` if complexity is necessary (see examples in codebase)
-
-### Issue: Tests Pass Locally but Coverage Fails
-
-**Cause:** Missing test coverage for new code paths
-
-**Solution:**
-```bash
-poetry run pytest -vv --cov=sql_metadata --cov-report=term-missing
-```
-This shows which lines are not covered.
-
-## Important Files
-
-### `sql_metadata/parser.py`
-- **Lines 134-200:** Token processing and initialization
-- **Lines 450-482:** WITH clause parsing (with_names property)
-- **Lines 484-580:** WITH queries extraction
-- **Lines 669-700:** `_handle_with_name_save` helper method
-- **Lines 822+:** Nested query resolution
-
-### `sql_metadata/keywords_lists.py`
-Defines SQL keyword sets:
-- `WITH_ENDING_KEYWORDS` (line 40)
-- `SUBQUERY_PRECEDING_KEYWORDS`
-- `TABLE_ADJUSTMENT_KEYWORDS`
-- `KEYWORDS_BEFORE_COLUMNS`
-- `SUPPORTED_QUERY_TYPES`
-
-### `test/test_with_statements.py`
-Comprehensive tests for WITH clause parsing:
-- Valid multi-CTE queries
-- CTEs with column definitions
-- Nested WITH statements
-- Malformed SQL detection (lines 500-540)
-
-## Debugging Tips
-
-### Running Single Test with Timeout
-```bash
-timeout 5 poetry run pytest test/test_file.py::test_name -vv
-```
-
-### Testing Infinite Loop Fix
-```bash
-timeout 3 poetry run python -c "from sql_metadata import Parser; Parser(query).tables"
-```
-If it times out, there's still an infinite loop.
-
-### Inspecting Token Flow
-Add debug prints in parser.py:
-```python
-print(f"Token: {token.value}, Next: {token.next_token.value if token.next_token else None}")
-```
 
 ## Dependencies
 
 ### Production
-- **sqlparse** (>=0.4.1, <0.6.0): SQL tokenization
+- **sqlglot** (^30.0.3): SQL parsing and AST construction
+- **sqlparse** (>=0.4.1, <0.6.0): Legacy tokenization
 
 ### Development
-- **pytest** (^8.4.2): Testing framework
-- **pytest-cov** (^7.0.0): Coverage reporting
-- **black** (^25.11): Code formatting
-- **flake8** (^7.3.0): Linting
-- **pylint** (^3.3.9): Advanced linting
-- **coverage** (^7.10): Coverage measurement
+- **pytest** (^9.0.2): Testing framework
+- **pytest-cov** (^7.1.0): Coverage reporting
+- **ruff** (^0.11): Linting and formatting
+- **coverage** (^7.13): Coverage measurement
 
 ## Version Information
 
-- **Current Version:** 2.19.0
-- **Python Support:** ^3.10 (Python 3.9 support dropped in #604)
+- **Current Version:** 2.20.0
+- **Python Support:** ^3.10
 - **License:** MIT
 - **Homepage:** https://github.com/macbre/sql-metadata
 
@@ -375,32 +248,14 @@ def my_property(self):
     return self._my_property
 ```
 
-### 2. Token Advancement Safety
-In loops, ensure every branch advances:
-```python
-while condition:
-    if pattern_match:
-        # ... process ...
-        if should_exit:
-            flag = False
-        else:
-            token = token.next_token  # MUST advance
-    else:
-        token = token.next_token  # MUST advance
-```
-
-### 3. Error Messages
+### 2. Error Messages
 Use consistent error messages:
-- `"This query is wrong"` - for malformed SQL
+- `"This query is wrong"` — for malformed SQL
+- `"Empty queries are not supported!"` — for empty input
 - Keep messages simple and consistent with existing patterns
 
-### 4. Test Comments
-Reference issues in test comments:
-```python
-def test_issue_fix():
-    # Test for issue #556 - malformed WITH query causes infinite loop
-    # https://github.com/macbre/sql-metadata/issues/556
-```
+### 3. Prefer sqlglot over manual parsing
+Always use sqlglot AST features (node types, `find_all`, `arg_types` traversal) rather than regex or manual string parsing when possible.
 
 ## Quick Reference Commands
 
@@ -423,17 +278,20 @@ make coverage                               # Coverage report
 poetry run python -c "from sql_metadata import Parser; print(Parser('SELECT * FROM t').tables)"
 ```
 
-## Notes for Future Work
+## Debugging Tips
 
-### Potential Improvements
-1. Consider refactoring `with_names` property to reduce complexity below 8
-2. Add more detailed error messages for different types of malformed SQL
-3. Consider extracting token advancement logic into helper methods
+### Inspecting the AST
+```python
+from sql_metadata import Parser
+p = Parser("SELECT a FROM t")
+print(p._ast_parser.ast)        # sqlglot AST tree
+print(repr(p._ast_parser.ast))  # Detailed node repr
+```
 
-### Technical Debt
-- Poetry dev-dependencies section is deprecated (migrate to poetry.group.dev.dependencies)
-- Consider adding type hints more comprehensively
-- Some test files could be consolidated
+### Running Single Test with Timeout
+```bash
+timeout 5 poetry run pytest test/test_file.py::test_name -vv
+```
 
 ## Last Updated
-2026-03-04 - Initial creation after fixing issue #556 (infinite loop in WITH statement parsing)
+2026-03-31 — Rewritten for v3 architecture (sqlglot-based, class extractors)
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
new file mode 100644
index 00000000..74df9c22
--- /dev/null
+++ b/ARCHITECTURE.md
@@ -0,0 +1,569 @@
+# Architecture
+
+sql-metadata v3 is a Python library that parses SQL queries and extracts metadata (tables, columns, aliases, CTEs, subqueries, etc.). It delegates SQL parsing to [sqlglot](https://github.com/tobymao/sqlglot) for AST construction, then walks the resulting tree with specialised extractors.
+
+## Module Map
+
+| Module | Role | Key Class/Function |
+|--------|------|--------------------|
+| [`parser.py`](sql_metadata/parser.py) | Public facade — composes all extractors via lazy properties | `Parser` |
+| [`ast_parser.py`](sql_metadata/ast_parser.py) | Thin orchestrator — composes SqlCleaner + DialectParser, caches AST | `ASTParser` |
+| [`sql_cleaner.py`](sql_metadata/sql_cleaner.py) | Raw SQL preprocessing (no sqlglot dependency) | `SqlCleaner`, `CleanResult` |
+| [`dialect_parser.py`](sql_metadata/dialect_parser.py) | Dialect detection, sqlglot parsing, parse-quality validation | `DialectParser`, `HashVarDialect`, `BracketedTableDialect` |
+| [`column_extractor.py`](sql_metadata/column_extractor.py) | Single-pass DFS column/alias extraction | `ColumnExtractor` |
+| [`table_extractor.py`](sql_metadata/table_extractor.py) | Table extraction with position-based sorting | `TableExtractor` |
+| [`nested_resolver.py`](sql_metadata/nested_resolver.py) | CTE/subquery name and body extraction, nested column resolution | `NestedResolver` |
+| [`query_type_extractor.py`](sql_metadata/query_type_extractor.py) | Query type detection from AST root node | `QueryTypeExtractor` |
+| [`comments.py`](sql_metadata/comments.py) | Comment extraction/stripping via tokenizer gaps | `extract_comments`, `strip_comments` |
+| [`keywords_lists.py`](sql_metadata/keywords_lists.py) | Keyword sets, `QueryType` and `TokenType` enums | — |
+| [`utils.py`](sql_metadata/utils.py) | `UniqueList` (deduplicating list), `flatten_list`, `_make_reverse_cte_map` | — |
+| [`generalizator.py`](sql_metadata/generalizator.py) | Query anonymisation for log aggregation | `Generalizator` |
+
+---
+
+## High-Level Pipeline
+
+```mermaid
+flowchart TB
+    SQL["Raw SQL string"]
+
+    subgraph AST_CONSTRUCTION["ASTParser (ast_parser.py)"]
+        direction TB
+        PP["SqlCleaner\n(sql_cleaner.py)"]
+        DP["DialectParser\n(dialect_parser.py)"]
+        PP --> DP
+    end
+
+    SQL --> AST_CONSTRUCTION
+    AST_CONSTRUCTION --> AST["sqlglot AST"]
+
+    subgraph EXTRACTION["Parallel Extractors"]
+        direction TB
+        TE["TableExtractor\n(table_extractor.py)"]
+        CE["ColumnExtractor\n(column_extractor.py)"]
+        QT["QueryTypeExtractor\n(query_type_extractor.py)"]
+    end
+
+    AST --> EXTRACTION
+
+    TE --> TA["tables, tables_aliases"]
+    CE --> COLS["columns, aliases"]
+    QT --> QTR["query_type"]
+
+    TA --> NR
+    COLS --> NR
+
+    subgraph RESOLVE["NestedResolver (nested_resolver.py)"]
+        direction TB
+        NR["Resolve subquery.column\nreferences"]
+        NE["Extract CTE/subquery\nnames and bodies"]
+    end
+
+    RESOLVE --> FINAL["Final metadata\n(cached on Parser)"]
+
+    COM["comments.py"] -.-> AST_CONSTRUCTION
+    COM -.-> FINAL
+```
+
+The `Parser` class ([`parser.py`](sql_metadata/parser.py)) is a thin facade that orchestrates these components through lazy cached properties. No extraction work happens until a property like `.columns` or `.tables` is first accessed.
+
+---
+
+## Module Deep Dives
+
+### Parser — the facade
+
+**File:** [`parser.py`](sql_metadata/parser.py) | **Class:** `Parser`
+
+The constructor (`__init__`) stores the raw SQL and initialises ~20 cache fields to `None`. It creates an `ASTParser` instance (lazy — no parsing yet) and defers everything else.
+
+**Composition:**
+
+```mermaid
+flowchart LR
+    P["Parser"]
+    P --> AP["ASTParser\n(self._ast_parser)"]
+    P --> TE["TableExtractor\n(created per .tables call)"]
+    P --> CE["ColumnExtractor\n(via extract_all())"]
+    P --> NR["NestedResolver\n(self._resolver, lazy)"]
+    P --> QTE["QueryTypeExtractor\n(via extract_query_type())"]
+```
+
+**Public properties:**
+
+| Property | Returns | Triggers |
+|----------|---------|----------|
+| `query` | Preprocessed SQL (normalised quoting) | — |
+| `query_type` | `QueryType` enum | `QueryTypeExtractor(ast, raw_query).extract()` |
+| `tokens` | `List[str]` of token strings | sqlglot tokenizer |
+| `columns` | Column names | AST parse → TableExtractor → `ColumnExtractor.extract()` → NestedResolver |
+| `columns_dict` | Columns by clause section | `.columns` |
+| `columns_aliases` | `{alias: target_column}` | `.columns` |
+| `columns_aliases_names` | List of alias names | `.columns` |
+| `columns_aliases_dict` | Aliases by clause section | `.columns` |
+| `tables` | Table names | AST parse → TableExtractor |
+| `tables_aliases` | `{alias: real_table}` | AST parse → TableExtractor |
+| `with_names` | CTE names | AST parse → NestedResolver |
+| `with_queries` | `{cte_name: body_sql}` | NestedResolver |
+| `subqueries` | `{subquery_name: body_sql}` | NestedResolver |
+| `subqueries_names` | Subquery aliases (innermost first) | AST parse → NestedResolver |
+| `limit_and_offset` | `(limit, offset)` tuple | AST parse (regex fallback) |
+| `values` | Literal values from INSERT | AST parse |
+| `values_dict` | `{column: value}` pairs | `.values` + `.columns` |
+| `comments` | Comment strings | sqlglot tokenizer |
+| `without_comments` | SQL sans comments | sqlglot tokenizer |
+| `generalize` | Anonymised SQL | Generalizator |
+
+**Caching pattern** — every property checks its cache field first:
+
+```python
+@property
+def tables(self) -> List[str]:
+    if self._tables is not None:
+        return self._tables
+    # ... compute and cache ...
+    self._tables = result
+    return self._tables
+```
+
+**Regex fallbacks** — when `sqlglot.parse()` fails (raises `ValueError`), the parser falls back to regex extraction for columns (`_extract_columns_regex`) and LIMIT/OFFSET (`_extract_limit_regex`) rather than raising an error.
+
+---
+
+### ASTParser — Orchestrator
+
+**File:** [`ast_parser.py`](sql_metadata/ast_parser.py) | **Class:** `ASTParser`
+
+Thin orchestrator that composes `SqlCleaner` and `DialectParser`. Instantiated once per `Parser` — actual parsing is deferred until `.ast` is first accessed. Exposes `.ast`, `.dialect`, `.is_replace`, and `.cte_name_map` properties.
+
+---
+
+### SqlCleaner — Raw SQL Preprocessing
+
+**File:** [`sql_cleaner.py`](sql_metadata/sql_cleaner.py) | **Class:** `SqlCleaner`
+
+Pure string transformations with no sqlglot dependency. `SqlCleaner.clean(sql)` returns a `CleanResult` namedtuple with the cleaned SQL, `is_replace` flag, and CTE name map.
+
+#### Preprocessing pipeline
+
+```mermaid
+flowchart LR
+    A["1. REPLACE INTO\n→ INSERT INTO"] --> B["2. SELECT...INTO\nvars stripped"]
+    B --> C["3. Strip\ncomments"]
+    C --> D["4. Normalise\nqualified CTE names"]
+    D --> E["5. Strip DB2\nisolation clauses"]
+    E --> F["6. Strip outer\nparentheses"]
+```
+
+| Step | Why | Example |
+|------|-----|---------|
+| REPLACE INTO rewrite | sqlglot parses `REPLACE INTO` as opaque `Command` | `REPLACE INTO t` → `INSERT INTO t` (flag set) |
+| SELECT...INTO strip | Prevents sqlglot from treating variables as tables | `SELECT x INTO @v FROM t` → `SELECT x FROM t` |
+| Comment stripping | Uses `strip_comments_for_parsing()` from `comments.py` | `SELECT /* hi */ 1` → `SELECT 1` |
+| CTE name normalisation | sqlglot can't parse `WITH db.name AS (...)` | `db.cte` → `db__DOT__cte` (reverse map stored) |
+| DB2 isolation clauses | Removes trailing `WITH UR/CS/RS/RR` | `SELECT 1 WITH UR` → `SELECT 1` |
+| Outer paren stripping | sqlglot can't parse `((UPDATE ...))` | `((UPDATE t SET x=1))` → `UPDATE t SET x=1` |
+
+---
+
+### DialectParser — Dialect Detection and Parsing
+
+**File:** [`dialect_parser.py`](sql_metadata/dialect_parser.py) | **Class:** `DialectParser`
+
+Combines dialect heuristics, `sqlglot.parse()` calls, and parse-quality validation. `DialectParser().parse(clean_sql)` returns `(ast, dialect)`.
+
+**Custom dialects (defined in same file):**
+
+- `HashVarDialect` — treats `#` as part of identifiers for MSSQL temp tables (`#temp`) and template variables (`#VAR#`)
+- `BracketedTableDialect` — TSQL subclass for `[bracket]` quoting; also signals `TableExtractor` to preserve brackets in output
+
+#### Dialect detection
+
+`_detect_dialects(sql)` inspects the SQL for syntax hints and returns an ordered list of dialects to try:
+
+```mermaid
+flowchart TD
+    SQL["Cleaned SQL"]
+    SQL --> H{"#WORD\nvariables?"}
+    H -->|Yes| HD["[HashVarDialect, None, mysql]"]
+    H -->|No| BT{"Backticks?"}
+    BT -->|Yes| MY["[mysql, None]"]
+    BT -->|No| BR{"Brackets\nor TOP?"}
+    BR -->|Yes| BD["[BracketedTableDialect, None, mysql]"]
+    BR -->|No| UN{"UNIQUE?"}
+    UN -->|Yes| UO["[None, mysql, oracle]"]
+    UN -->|No| LV{"LATERAL VIEW?"}
+    LV -->|Yes| SP["[spark, None, mysql]"]
+    LV -->|No| DF["[None, mysql]"]
+```
+
+#### Multi-dialect retry
+
+`_try_dialects` iterates through the dialect list. For each dialect:
+
+1. Parse with `sqlglot.parse()` (warnings suppressed)
+2. Check for degradation via `_is_degraded` — phantom tables (`IGNORE`, `""`), keyword-as-column names (`UNIQUE`, `DISTINCT`)
+3. If degraded and not the last dialect, try the next one
+4. If all fail, raise `ValueError("This query is wrong")`
+
+---
+
+### ColumnExtractor — columns and aliases
+
+**File:** [`column_extractor.py`](sql_metadata/column_extractor.py) | **Class:** `ColumnExtractor`
+
+Performs a single-pass depth-first walk of the AST in `arg_types` key order (which mirrors left-to-right SQL text order). Collects columns and column aliases into a `_Collector` accumulator. Returns an `ExtractionResult` frozen dataclass — consumed directly by `Parser.columns` and friends.
+
+`Parser` calls `ColumnExtractor` directly (no wrapper functions):
+
+```python
+extractor = ColumnExtractor(ast, table_aliases, cte_name_map)
+result = extractor.extract()  # returns ExtractionResult
+result.columns        # UniqueList of column names
+result.columns_dict   # columns by clause section
+result.alias_map      # {alias: target_column}
+```
+
+#### Data flow
+
+```mermaid
+flowchart TB
+    AST["sqlglot AST"] --> EXT["ColumnExtractor.extract()"]
+    TA["table_aliases\n(from TableExtractor)"] --> EXT
+    EXT --> WALK["_walk() — DFS in\narg_types key order"]
+    WALK --> COLL["_Collector\n(mutable accumulator)"]
+    COLL --> RES["ExtractionResult\n(frozen dataclass)"]
+```
+
+#### DFS dispatch
+
+The walk visits each node and dispatches to specialised handlers:
+
+| AST Node Type | Handler | What it does |
+|---------------|---------|-------------|
+| `exp.Star` | `_handle_star` | Adds `*` (skips if inside function like `COUNT(*)`) |
+| `exp.ColumnDef` | (inline) | Adds column name for CREATE TABLE DDL |
+| `exp.Identifier` | `_handle_identifier` | Adds column if in JOIN USING context |
+| `exp.CTE` | `_handle_cte` | Records CTE name, processes column definitions |
+| `exp.Column` | `_handle_column` | Main handler — resolves table alias, builds full name |
+| `exp.Subquery` (aliased) | (inline) | Records subquery name and depth for ordering |
+
+**Special processing** in `_process_child_key`:
+- SELECT expressions → `_handle_select_exprs` → iterates expressions, detects aliases
+- INSERT schema → `_handle_insert_schema` → extracts column list from `INSERT INTO t(col1, col2)`
+- JOIN USING → `_handle_join_using` → extracts column identifiers
+
+#### Clause classification
+
+`_classify_clause` maps each `arg_types` key to a `columns_dict` section:
+
+| Key | Section |
+|-----|---------|
+| `expressions` (under `Select`) | `"select"` |
+| `expressions` (under `Update`) | `"update"` |
+| `where` | `"where"` |
+| `group` | `"group_by"` |
+| `order` | `"order_by"` |
+| `having` | `"having"` |
+| `on`, `using` | `"join"` |
+
+#### Alias handling
+
+`_handle_alias` processes `SELECT expr AS alias`:
+
+1. If the aliased expression contains a subquery → walk it recursively, extract its SELECT columns as the alias target
+2. If the expression has columns → add them, then register the alias mapping (unless it's a self-alias like `SELECT col AS col`)
+3. If no columns (e.g., `SELECT 1 AS num`) → register the alias with no target
+
+#### Date-part function filtering
+
+`_is_date_part_unit` prevents extracting unit keywords as columns in functions like `DATEADD(day, 1, col)` — `day` is a keyword, not a column reference.
+
+---
+
+### TableExtractor — tables and table aliases
+
+**File:** [`table_extractor.py`](sql_metadata/table_extractor.py) | **Class:** `TableExtractor`
+
+Walks the AST for `exp.Table` and `exp.Lateral` nodes, builds fully-qualified table names, and sorts results by first occurrence in the raw SQL.
+
+#### Extraction flow
+
+```mermaid
+flowchart TB
+    AST["sqlglot AST"] --> CHECK{"exp.Command?"}
+    CHECK -->|Yes| REGEX["Regex fallback\n(_extract_tables_from_command)"]
+    CHECK -->|No| CREATE{"exp.Create?"}
+    CREATE -->|Yes| TARGET["Extract CREATE target"]
+    CREATE -->|No| SKIP["skip"]
+    TARGET --> COLLECT
+    SKIP --> COLLECT["_collect_all()\nWalk exp.Table + exp.Lateral"]
+    COLLECT --> FILTER["Filter out CTE names"]
+    FILTER --> SORT["Sort by _first_position()\n(regex in raw SQL)"]
+    SORT --> ORDER["_place_tables_in_order()\nCREATE target goes first"]
+```
+
+**Key algorithms:**
+
+- **Name construction** — `_table_full_name` assembles `catalog.db.name`, with special handling for bracket mode (TSQL) and double-dot notation (`catalog..name`)
+- **Position sorting** — `_first_position` finds each table name in the raw SQL via regex, preferring matches after table-introducing keywords (`FROM`, `JOIN`, `TABLE`, `INTO`, `UPDATE`). This ensures output order matches left-to-right reading order.
+- **CTE filtering** — table names matching known CTE names are excluded, so only real tables appear in the output
+
+**Alias extraction** — `extract_aliases` walks `exp.Table` nodes looking for aliases:
+
+```sql
+SELECT * FROM users u JOIN orders o ON u.id = o.user_id
+--                   ^            ^
+--              alias="u"    alias="o"
+-- Result: {"u": "users", "o": "orders"}
+```
+
+---
+
+### NestedResolver — CTE/subquery names, bodies, and resolution
+
+**File:** [`nested_resolver.py`](sql_metadata/nested_resolver.py) | **Class:** `NestedResolver`
+
+Handles the complete "look inside nested queries" concern. Created lazily by `Parser._get_resolver()`.
+
+#### Four responsibilities
+
+**1. Name extraction** — extract CTE and subquery names from the AST:
+
+- `extract_cte_names(ast, cte_name_map)` — static method, walks `exp.CTE` nodes and collects their aliases (with reverse CTE name map applied)
+- `extract_subquery_names(ast)` — static method, post-order walk collecting aliased `exp.Subquery` names
+
+Called directly by `Parser.with_names` and `Parser.subqueries_names`.
+
+**2. Body extraction** — render CTE/subquery AST nodes back to SQL:
+
+- `extract_cte_bodies` — finds `exp.CTE` nodes in the AST, renders their body via `_PreservingGenerator`
+- `extract_subquery_bodies` — post-order walk so inner subqueries appear before outer ones
+- `_PreservingGenerator` — custom sqlglot `Generator` that preserves function signatures sqlglot would normalise (e.g., keeps `IFNULL` instead of converting to `COALESCE`, keeps `DIV` instead of `CAST(... / ... AS INT)`)
+
+**3. Column resolution** — `resolve()` runs two phases:
+
+```mermaid
+flowchart TB
+    INPUT["columns from ColumnExtractor"]
+    INPUT --> P1["Phase 1: _resolve_sub_queries()\nReplace subquery.column refs\nwith actual columns"]
+    P1 --> P2["Phase 2: _resolve_bare_through_nested()\nDrop bare names that are\naliases in nested queries"]
+    P2 --> OUTPUT["Resolved columns"]
+```
+
+Phase 1 example:
+```sql
+SELECT sq.name FROM (SELECT name FROM users) sq
+-- "sq.name" → resolved through subquery → "name"
+```
+
+Phase 2 example:
+```sql
+WITH cte AS (SELECT id, name AS label FROM users)
+SELECT label FROM cte
+-- "label" is an alias inside the CTE → dropped from columns, added to aliases
+```
+
+**4. Recursive sub-Parser instantiation** — when resolving `subquery.column`, the resolver creates a new `Parser(body_sql)` for each nested query body (cached in `_subqueries_parsers` / `_with_parsers`). This means the full pipeline runs recursively for each CTE/subquery.
+
+#### Alias resolution with cycle detection
+
+`_resolve_column_alias` follows alias chains with a `visited` set to prevent infinite loops:
+
+```python
+# a → b → c (resolves to "c")
+# a → b → a (cycle detected, stops at "a")
+```
+
+---
+
+### QueryTypeExtractor
+
+**File:** [`query_type_extractor.py`](sql_metadata/query_type_extractor.py) | **Class:** `QueryTypeExtractor`
+
+Maps the AST root node type to a `QueryType` enum value via `_SIMPLE_TYPE_MAP`:
+
+| AST Node | QueryType |
+|----------|-----------|
+| `exp.Select`, `exp.Union`, `exp.Intersect`, `exp.Except` | `SELECT` |
+| `exp.Insert` | `INSERT` |
+| `exp.Update` | `UPDATE` |
+| `exp.Delete` | `DELETE` |
+| `exp.Create` | `CREATE` |
+| `exp.Alter` | `ALTER` |
+| `exp.Drop` | `DROP` |
+| `exp.TruncateTable` | `TRUNCATE` |
+| `exp.Merge` | `MERGE` |
+
+Special handling:
+- Parenthesised queries → `_unwrap_parens` strips `Paren`/`Subquery` wrappers
+- `exp.Command` → `_resolve_command_type` checks for `CREATE FUNCTION` / `ALTER`
+- `REPLACE INTO` → detected via `ASTParser.is_replace` flag, patched in `Parser.query_type`
+
+---
+
+### Comments
+
+**File:** [`comments.py`](sql_metadata/comments.py)
+
+A collection of pure stateless functions (no class). Exploits the fact that sqlglot's tokenizer skips comments — comments live in the *gaps* between consecutive token positions.
+
+**Algorithm:**
+
+1. Tokenize the SQL with the appropriate tokenizer
+2. For each gap between token `[i].end` and token `[i+1].start`, scan for comment delimiters (`--`, `/* */`, `#`)
+3. Collect or strip the matches
+
+**Tokenizer selection** — `_choose_tokenizer`:
+- If SQL contains `#` used as a comment (not a variable) → MySQL tokenizer (treats `#` as comment delimiter)
+- Otherwise → default sqlglot tokenizer
+- `_has_hash_variables` distinguishes `#temp` (MSSQL) and `#VAR#` (template) from `# comment` (MySQL)
+
+**Two stripping variants:**
+- `strip_comments` — public API, preserves `#VAR` references
+- `strip_comments_for_parsing` — internal, always strips `#` comments (needed before `sqlglot.parse()`)
+
+---
+
+### Supporting Modules
+
+**[`keywords_lists.py`](sql_metadata/keywords_lists.py)** — keyword sets used for token classification and query type mapping:
+- `KEYWORDS_BEFORE_COLUMNS` — keywords after which columns appear (`SELECT`, `WHERE`, `ON`, etc.)
+- `TABLE_ADJUSTMENT_KEYWORDS` — keywords after which tables appear (`FROM`, `JOIN`, `INTO`, etc.)
+- `COLUMNS_SECTIONS` — maps keywords to `columns_dict` section names
+- `QueryType` — string enum (`str, Enum`) for direct comparison (`parser.query_type == "SELECT"`)
+
+**[`utils.py`](sql_metadata/utils.py):**
+- `UniqueList` — deduplicating list with O(1) membership checks via internal `set`. Used everywhere to collect columns, tables, aliases.
+- `flatten_list` — recursively flattens nested lists from multi-column alias resolution.
+- `_make_reverse_cte_map` — builds reverse mapping from placeholder CTE names to originals, shared by `ColumnExtractor` and `NestedResolver`.
+
+**[`generalizator.py`](sql_metadata/generalizator.py)** — anonymises SQL for log aggregation: strips comments, replaces literals with `X`, numbers with `N`, collapses `IN(...)` lists to `(XYZ)`.
+
+---
+
+## Traced Walkthrough
+
+Let's trace `Parser("SELECT a AS x FROM t").columns_aliases` step by step.
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant Parser
+    participant ASTParser
+    participant sqlglot
+    participant TableExtractor
+    participant ColumnExtractor
+    participant NestedResolver
+
+    User->>Parser: .columns_aliases
+    Parser->>Parser: .columns (not cached yet)
+
+    Note over Parser: Need AST and table_aliases
+
+    Parser->>ASTParser: .ast (first access)
+    ASTParser->>ASTParser: SqlCleaner.clean()
+    Note over ASTParser: No REPLACE, no comments,<br/>no qualified CTEs
+    ASTParser->>ASTParser: DialectParser().parse()
+    Note over ASTParser: No special syntax →<br/>[None, "mysql"]
+    ASTParser->>sqlglot: sqlglot.parse(sql, dialect=None)
+    sqlglot-->>ASTParser: exp.Select AST
+
+    Parser->>Parser: .tables_aliases
+    Parser->>TableExtractor: extract_aliases(tables)
+    Note over TableExtractor: No aliases on "t"
+    TableExtractor-->>Parser: {}
+
+    Parser->>ColumnExtractor: ColumnExtractor(ast, {}, {}).extract()
+    Note over ColumnExtractor: _walk() DFS begins
+
+    Note over ColumnExtractor: Visit Select node →<br/>_walk_children()
+    Note over ColumnExtractor: key="expressions" + Select →<br/>_handle_select_exprs()
+    Note over ColumnExtractor: expr[0] is Alias "x" →<br/>_handle_alias()
+    Note over ColumnExtractor: inner is Column "a" →<br/>_flat_columns() → ["a"]<br/>add_column("a", "select")<br/>add_alias("x", "a", "select")
+    Note over ColumnExtractor: key="from" →<br/>skip (Table, not Column)
+
+    ColumnExtractor-->>Parser: ExtractionResult (frozen dataclass)
+
+    Note over Parser: result.columns=["a"]<br/>result.alias_map={"x": "a"}
+
+    Parser->>NestedResolver: resolve(columns, ...)
+    Note over NestedResolver: No subqueries or CTEs<br/>→ columns unchanged
+
+    NestedResolver-->>Parser: (["a"], {...}, {"x": "a"})
+
+    Parser-->>User: {"x": "a"}
+```
+
+**What happened:**
+
+1. **`Parser.__init__`** — stored raw SQL, created `ASTParser` (lazy)
+2. **`.columns_aliases`** accessed → triggers `.columns` (not cached)
+3. **`.columns`** needs the AST → accesses `self._ast_parser.ast`
+4. **`ASTParser.ast`** (first access) → `SqlCleaner.clean()` → `DialectParser().parse()` → `sqlglot.parse()`
+5. **`.tables_aliases`** needed for column extraction → `TableExtractor.extract_aliases()` → `{}` (no aliases on `t`)
+6. **`ColumnExtractor(ast, {}, {}).extract()`** → DFS walk:
+   - Visits `Select` node, key `"expressions"` → `_handle_select_exprs()`
+   - Finds `Alias(Column("a"), "x")` → `_handle_alias()` → records column `"a"` in select section, alias `"x"` → `"a"`
+   - Key `"from"` → finds `Table("t")`, not a column node, skipped
+7. **`NestedResolver.resolve()`** — no subqueries or CTEs, columns pass through unchanged
+8. **Result cached** — `_columns = ["a"]`, `_columns_aliases = {"x": "a"}`
+
+---
+
+## Dependency Graph
+
+```mermaid
+flowchart TB
+    INIT["__init__.py"]
+    INIT --> P["parser.py"]
+
+    P --> AST["ast_parser.py"]
+    P --> EXT["column_extractor.py"]
+    P --> TAB["table_extractor.py"]
+    P --> RES["nested_resolver.py"]
+    P --> QT["query_type_extractor.py"]
+    P --> COM["comments.py"]
+    P --> GEN["generalizator.py"]
+    P --> KW["keywords_lists.py"]
+    P --> UT["utils.py"]
+
+    AST --> SC["sql_cleaner.py"]
+    AST --> DP["dialect_parser.py"]
+
+    SC --> COM
+    DP --> COM
+    DP -.->|"sqlglot.parse()"| SG["sqlglot"]
+    TAB --> DP
+
+    EXT -.-> SG
+    EXT --> UT
+    TAB -.-> SG
+    RES -.-> SG
+    RES --> UT
+    RES -->|"sub-Parser\n(recursive)"| P
+    QT -.-> SG
+    QT --> KW
+    COM -.->|"Tokenizer"| SG
+    GEN --> COM
+
+    style SG fill:#f0f0f0,stroke:#999
+```
+
+Note the circular dependency: `nested_resolver.py` imports `Parser` from `parser.py` to create sub-Parser instances for nested queries. This import is deferred (inside method bodies) to avoid import-time cycles.
+
+---
+
+## Key Design Patterns
+
+**Lazy evaluation with caching** — every `Parser` property computes on first access and caches the result. This means you pay zero cost for properties you never access.
+
+**Composition over inheritance** — `Parser` doesn't subclass anything meaningful. It composes `ASTParser` (which itself composes `SqlCleaner` and `DialectParser`), `TableExtractor`, `ColumnExtractor`, `NestedResolver`, and `QueryTypeExtractor` as separate concerns.
+
+**Single-pass DFS extraction** — `ColumnExtractor` walks the AST exactly once in `arg_types` key order. Because sqlglot's `arg_types` keys are ordered to mirror left-to-right SQL text, the walk naturally processes clauses in source order.
+
+**Multi-dialect retry with degradation detection** — rather than guessing one dialect, `DialectParser` tries several in order and picks the first that doesn't produce a degraded result (phantom tables, keyword-as-column names).
+
+**Graceful regex fallbacks** — when the AST parse fails entirely, the parser degrades to regex-based extraction for columns (INSERT INTO pattern) and LIMIT/OFFSET rather than raising an error.
+
+**Recursive sub-parsing** — `NestedResolver` creates fresh `Parser` instances for CTE/subquery bodies. This reuses the entire pipeline recursively, with caching to avoid re-parsing the same body twice.
diff --git a/CLAUDE.md b/CLAUDE.md
index 43c994c2..2ace935b 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1 +1,5 @@
 @AGENTS.md
+
+## Rules
+
+- **Never change test files to match incorrect code output.** Tests define the expected behavior. If a test fails, fix the source code, not the test. The only exception is when a feature is explicitly removed (like `get_query_tokens` in the v3 migration).
diff --git a/Makefile b/Makefile
index 226c0adf..b686ed9c 100644
--- a/Makefile
+++ b/Makefile
@@ -8,11 +8,13 @@ coverage:
 	poetry run pytest -vv --cov=sql_metadata --cov-report=term --cov-report=html
 
 lint:
-	poetry run flake8 sql_metadata
-	poetry run pylint sql_metadata
+	poetry run ruff check --fix sql_metadata
 
 format:
-	poetry run black .
+	poetry run ruff format .
+
+type_check:
+	poetry run mypy sql_metadata
 
 publish:
 	# run git tag -a v0.0.0 before running make publish
diff --git a/README.md b/README.md
index 4cd34512..95c67976 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 [![PyPI](https://img.shields.io/pypi/v/sql_metadata.svg)](https://pypi.python.org/pypi/sql_metadata)
 [![Tests](https://github.com/macbre/sql-metadata/actions/workflows/python-ci.yml/badge.svg)](https://github.com/macbre/sql-metadata/actions/workflows/python-ci.yml)
 [![Coverage Status](https://coveralls.io/repos/github/macbre/sql-metadata/badge.svg?branch=master&1)](https://coveralls.io/github/macbre/sql-metadata?branch=master)
-<a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
+[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
 [![Maintenance](https://img.shields.io/badge/maintained%3F-yes-green.svg)](https://github.com/macbre/sql-metadata/graphs/commit-activity)
 [![Downloads](https://pepy.tech/badge/sql-metadata/month)](https://pepy.tech/project/sql-metadata)
 
diff --git a/poetry.lock b/poetry.lock
index 6631f283..e807a295 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,87 +1,4 @@
-# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
-
-[[package]]
-name = "astroid"
-version = "4.0.4"
-description = "An abstract syntax tree for Python with inference support."
-optional = false
-python-versions = ">=3.10.0"
-groups = ["dev"]
-files = [
-    {file = "astroid-4.0.4-py3-none-any.whl", hash = "sha256:52f39653876c7dec3e3afd4c2696920e05c83832b9737afc21928f2d2eb7a753"},
-    {file = "astroid-4.0.4.tar.gz", hash = "sha256:986fed8bcf79fb82c78b18a53352a0b287a73817d6dbcfba3162da36667c49a0"},
-]
-
-[package.dependencies]
-typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""}
-
-[[package]]
-name = "black"
-version = "26.3.1"
-description = "The uncompromising code formatter."
-optional = false
-python-versions = ">=3.10"
-groups = ["dev"]
-files = [
-    {file = "black-26.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:86a8b5035fce64f5dcd1b794cf8ec4d31fe458cf6ce3986a30deb434df82a1d2"},
-    {file = "black-26.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5602bdb96d52d2d0672f24f6ffe5218795736dd34807fd0fd55ccd6bf206168b"},
-    {file = "black-26.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c54a4a82e291a1fee5137371ab488866b7c86a3305af4026bdd4dc78642e1ac"},
-    {file = "black-26.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:6e131579c243c98f35bce64a7e08e87fb2d610544754675d4a0e73a070a5aa3a"},
-    {file = "black-26.3.1-cp310-cp310-win_arm64.whl", hash = "sha256:5ed0ca58586c8d9a487352a96b15272b7fa55d139fc8496b519e78023a8dab0a"},
-    {file = "black-26.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:28ef38aee69e4b12fda8dba75e21f9b4f979b490c8ac0baa7cb505369ac9e1ff"},
-    {file = "black-26.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bf9bf162ed91a26f1adba8efda0b573bc6924ec1408a52cc6f82cb73ec2b142c"},
-    {file = "black-26.3.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:474c27574d6d7037c1bc875a81d9be0a9a4f9ee95e62800dab3cfaadbf75acd5"},
-    {file = "black-26.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:5e9d0d86df21f2e1677cc4bd090cd0e446278bcbbe49bf3659c308c3e402843e"},
-    {file = "black-26.3.1-cp311-cp311-win_arm64.whl", hash = "sha256:9a5e9f45e5d5e1c5b5c29b3bd4265dcc90e8b92cf4534520896ed77f791f4da5"},
-    {file = "black-26.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e6f89631eb88a7302d416594a32faeee9fb8fb848290da9d0a5f2903519fc1"},
-    {file = "black-26.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:41cd2012d35b47d589cb8a16faf8a32ef7a336f56356babd9fcf70939ad1897f"},
-    {file = "black-26.3.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f76ff19ec5297dd8e66eb64deda23631e642c9393ab592826fd4bdc97a4bce7"},
-    {file = "black-26.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:ddb113db38838eb9f043623ba274cfaf7d51d5b0c22ecb30afe58b1bb8322983"},
-    {file = "black-26.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:dfdd51fc3e64ea4f35873d1b3fb25326773d55d2329ff8449139ebaad7357efb"},
-    {file = "black-26.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:855822d90f884905362f602880ed8b5df1b7e3ee7d0db2502d4388a954cc8c54"},
-    {file = "black-26.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8a33d657f3276328ce00e4d37fe70361e1ec7614da5d7b6e78de5426cb56332f"},
-    {file = "black-26.3.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f1cd08e99d2f9317292a311dfe578fd2a24b15dbce97792f9c4d752275c1fa56"},
-    {file = "black-26.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:c7e72339f841b5a237ff14f7d3880ddd0fc7f98a1199e8c4327f9a4f478c1839"},
-    {file = "black-26.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:afc622538b430aa4c8c853f7f63bc582b3b8030fd8c80b70fb5fa5b834e575c2"},
-    {file = "black-26.3.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2d6bfaf7fd0993b420bed691f20f9492d53ce9a2bcccea4b797d34e947318a78"},
-    {file = "black-26.3.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f89f2ab047c76a9c03f78d0d66ca519e389519902fa27e7a91117ef7611c0568"},
-    {file = "black-26.3.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b07fc0dab849d24a80a29cfab8d8a19187d1c4685d8a5e6385a5ce323c1f015f"},
-    {file = "black-26.3.1-cp314-cp314-win_amd64.whl", hash = "sha256:0126ae5b7c09957da2bdbd91a9ba1207453feada9e9fe51992848658c6c8e01c"},
-    {file = "black-26.3.1-cp314-cp314-win_arm64.whl", hash = "sha256:92c0ec1f2cc149551a2b7b47efc32c866406b6891b0ee4625e95967c8f4acfb1"},
-    {file = "black-26.3.1-py3-none-any.whl", hash = "sha256:2bd5aa94fc267d38bb21a70d7410a89f1a1d318841855f698746f8e7f51acd1b"},
-    {file = "black-26.3.1.tar.gz", hash = "sha256:2c50f5063a9641c7eed7795014ba37b0f5fa227f3d408b968936e24bc0566b07"},
-]
-
-[package.dependencies]
-click = ">=8.0.0"
-mypy-extensions = ">=0.4.3"
-packaging = ">=22.0"
-pathspec = ">=1.0.0"
-platformdirs = ">=2"
-pytokens = ">=0.4.0,<0.5.0"
-tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
-typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
-
-[package.extras]
-colorama = ["colorama (>=0.4.3)"]
-d = ["aiohttp (>=3.10)"]
-jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
-uvloop = ["uvloop (>=0.15.2) ; sys_platform != \"win32\"", "winloop (>=0.5.0) ; sys_platform == \"win32\""]
-
-[[package]]
-name = "click"
-version = "8.1.8"
-description = "Composable command line interface toolkit"
-optional = false
-python-versions = ">=3.7"
-groups = ["dev"]
-files = [
-    {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
-    {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
-]
-
-[package.dependencies]
-colorama = {version = "*", markers = "platform_system == \"Windows\""}
+# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand.
 
 [[package]]
 name = "colorama"
@@ -90,7 +7,7 @@ description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 groups = ["dev"]
-markers = "platform_system == \"Windows\" or sys_platform == \"win32\""
+markers = "sys_platform == \"win32\""
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
@@ -218,22 +135,6 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1
 [package.extras]
 toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
 
-[[package]]
-name = "dill"
-version = "0.4.0"
-description = "serialize all of Python"
-optional = false
-python-versions = ">=3.8"
-groups = ["dev"]
-files = [
-    {file = "dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049"},
-    {file = "dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0"},
-]
-
-[package.extras]
-graph = ["objgraph (>=1.7.2)"]
-profile = ["gprof2dot (>=2022.7.29)"]
-
 [[package]]
 name = "exceptiongroup"
 version = "1.2.2"
@@ -250,23 +151,6 @@ files = [
 [package.extras]
 test = ["pytest (>=6)"]
 
-[[package]]
-name = "flake8"
-version = "7.3.0"
-description = "the modular source code checker: pep8 pyflakes and co"
-optional = false
-python-versions = ">=3.9"
-groups = ["dev"]
-files = [
-    {file = "flake8-7.3.0-py2.py3-none-any.whl", hash = "sha256:b9696257b9ce8beb888cdbe31cf885c90d31928fe202be0889a7cdafad32f01e"},
-    {file = "flake8-7.3.0.tar.gz", hash = "sha256:fe044858146b9fc69b551a4b490d69cf960fcb78ad1edcb84e7fbb1b4a8e3872"},
-]
-
-[package.dependencies]
-mccabe = ">=0.7.0,<0.8.0"
-pycodestyle = ">=2.14.0,<2.15.0"
-pyflakes = ">=3.4.0,<3.5.0"
-
 [[package]]
 name = "iniconfig"
 version = "2.1.0"
@@ -280,33 +164,168 @@ files = [
 ]
 
 [[package]]
-name = "isort"
-version = "6.0.1"
-description = "A Python utility / library to sort Python imports."
+name = "librt"
+version = "0.8.1"
+description = "Mypyc runtime library"
 optional = false
-python-versions = ">=3.9.0"
+python-versions = ">=3.9"
 groups = ["dev"]
+markers = "platform_python_implementation != \"PyPy\""
 files = [
-    {file = "isort-6.0.1-py3-none-any.whl", hash = "sha256:2dc5d7f65c9678d94c88dfc29161a320eec67328bc97aad576874cb4be1e9615"},
-    {file = "isort-6.0.1.tar.gz", hash = "sha256:1cb5df28dfbc742e490c5e41bad6da41b805b0a8be7bc93cd0fb2a8a890ac450"},
+    {file = "librt-0.8.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:81fd938344fecb9373ba1b155968c8a329491d2ce38e7ddb76f30ffb938f12dc"},
+    {file = "librt-0.8.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5db05697c82b3a2ec53f6e72b2ed373132b0c2e05135f0696784e97d7f5d48e7"},
+    {file = "librt-0.8.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d56bc4011975f7460bea7b33e1ff425d2f1adf419935ff6707273c77f8a4ada6"},
+    {file = "librt-0.8.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5cdc0f588ff4b663ea96c26d2a230c525c6fc62b28314edaaaca8ed5af931ad0"},
+    {file = "librt-0.8.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:97c2b54ff6717a7a563b72627990bec60d8029df17df423f0ed37d56a17a176b"},
+    {file = "librt-0.8.1-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8f1125e6bbf2f1657d9a2f3ccc4a2c9b0c8b176965bb565dd4d86be67eddb4b6"},
+    {file = "librt-0.8.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8f4bb453f408137d7581be309b2fbc6868a80e7ef60c88e689078ee3a296ae71"},
+    {file = "librt-0.8.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c336d61d2fe74a3195edc1646d53ff1cddd3a9600b09fa6ab75e5514ba4862a7"},
+    {file = "librt-0.8.1-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:eb5656019db7c4deacf0c1a55a898c5bb8f989be904597fcb5232a2f4828fa05"},
+    {file = "librt-0.8.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c25d9e338d5bed46c1632f851babf3d13c78f49a225462017cf5e11e845c5891"},
+    {file = "librt-0.8.1-cp310-cp310-win32.whl", hash = "sha256:aaab0e307e344cb28d800957ef3ec16605146ef0e59e059a60a176d19543d1b7"},
+    {file = "librt-0.8.1-cp310-cp310-win_amd64.whl", hash = "sha256:56e04c14b696300d47b3bc5f1d10a00e86ae978886d0cee14e5714fafb5df5d2"},
+    {file = "librt-0.8.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:681dc2451d6d846794a828c16c22dc452d924e9f700a485b7ecb887a30aad1fd"},
+    {file = "librt-0.8.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3b4350b13cc0e6f5bec8fa7caf29a8fb8cdc051a3bae45cfbfd7ce64f009965"},
+    {file = "librt-0.8.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ac1e7817fd0ed3d14fd7c5df91daed84c48e4c2a11ee99c0547f9f62fdae13da"},
+    {file = "librt-0.8.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:747328be0c5b7075cde86a0e09d7a9196029800ba75a1689332348e998fb85c0"},
+    {file = "librt-0.8.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f0af2bd2bc204fa27f3d6711d0f360e6b8c684a035206257a81673ab924aa11e"},
+    {file = "librt-0.8.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d480de377f5b687b6b1bc0c0407426da556e2a757633cc7e4d2e1a057aa688f3"},
+    {file = "librt-0.8.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d0ee06b5b5291f609ddb37b9750985b27bc567791bc87c76a569b3feed8481ac"},
+    {file = "librt-0.8.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9e2c6f77b9ad48ce5603b83b7da9ee3e36b3ab425353f695cba13200c5d96596"},
+    {file = "librt-0.8.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:439352ba9373f11cb8e1933da194dcc6206daf779ff8df0ed69c5e39113e6a99"},
+    {file = "librt-0.8.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:82210adabbc331dbb65d7868b105185464ef13f56f7f76688565ad79f648b0fe"},
+    {file = "librt-0.8.1-cp311-cp311-win32.whl", hash = "sha256:52c224e14614b750c0a6d97368e16804a98c684657c7518752c356834fff83bb"},
+    {file = "librt-0.8.1-cp311-cp311-win_amd64.whl", hash = "sha256:c00e5c884f528c9932d278d5c9cbbea38a6b81eb62c02e06ae53751a83a4d52b"},
+    {file = "librt-0.8.1-cp311-cp311-win_arm64.whl", hash = "sha256:f7cdf7f26c2286ffb02e46d7bac56c94655540b26347673bea15fa52a6af17e9"},
+    {file = "librt-0.8.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a28f2612ab566b17f3698b0da021ff9960610301607c9a5e8eaca62f5e1c350a"},
+    {file = "librt-0.8.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:60a78b694c9aee2a0f1aaeaa7d101cf713e92e8423a941d2897f4fa37908dab9"},
+    {file = "librt-0.8.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:758509ea3f1eba2a57558e7e98f4659d0ea7670bff49673b0dde18a3c7e6c0eb"},
+    {file = "librt-0.8.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:039b9f2c506bd0ab0f8725aa5ba339c6f0cd19d3b514b50d134789809c24285d"},
+    {file = "librt-0.8.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bb54f1205a3a6ab41a6fd71dfcdcbd278670d3a90ca502a30d9da583105b6f7"},
+    {file = "librt-0.8.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:05bd41cdee35b0c59c259f870f6da532a2c5ca57db95b5f23689fcb5c9e42440"},
+    {file = "librt-0.8.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adfab487facf03f0d0857b8710cf82d0704a309d8ffc33b03d9302b4c64e91a9"},
+    {file = "librt-0.8.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:153188fe98a72f206042be10a2c6026139852805215ed9539186312d50a8e972"},
+    {file = "librt-0.8.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:dd3c41254ee98604b08bd5b3af5bf0a89740d4ee0711de95b65166bf44091921"},
+    {file = "librt-0.8.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e0d138c7ae532908cbb342162b2611dbd4d90c941cd25ab82084aaf71d2c0bd0"},
+    {file = "librt-0.8.1-cp312-cp312-win32.whl", hash = "sha256:43353b943613c5d9c49a25aaffdba46f888ec354e71e3529a00cca3f04d66a7a"},
+    {file = "librt-0.8.1-cp312-cp312-win_amd64.whl", hash = "sha256:ff8baf1f8d3f4b6b7257fcb75a501f2a5499d0dda57645baa09d4d0d34b19444"},
+    {file = "librt-0.8.1-cp312-cp312-win_arm64.whl", hash = "sha256:0f2ae3725904f7377e11cc37722d5d401e8b3d5851fb9273d7f4fe04f6b3d37d"},
+    {file = "librt-0.8.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7e6bad1cd94f6764e1e21950542f818a09316645337fd5ab9a7acc45d99a8f35"},
+    {file = "librt-0.8.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cf450f498c30af55551ba4f66b9123b7185362ec8b625a773b3d39aa1a717583"},
+    {file = "librt-0.8.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:eca45e982fa074090057132e30585a7e8674e9e885d402eae85633e9f449ce6c"},
+    {file = "librt-0.8.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c3811485fccfda840861905b8c70bba5ec094e02825598bb9d4ca3936857a04"},
+    {file = "librt-0.8.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e4af413908f77294605e28cfd98063f54b2c790561383971d2f52d113d9c363"},
+    {file = "librt-0.8.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5212a5bd7fae98dae95710032902edcd2ec4dc994e883294f75c857b83f9aba0"},
+    {file = "librt-0.8.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e692aa2d1d604e6ca12d35e51fdc36f4cda6345e28e36374579f7ef3611b3012"},
+    {file = "librt-0.8.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4be2a5c926b9770c9e08e717f05737a269b9d0ebc5d2f0060f0fe3fe9ce47acb"},
+    {file = "librt-0.8.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fd1a720332ea335ceb544cf0a03f81df92abd4bb887679fd1e460976b0e6214b"},
+    {file = "librt-0.8.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2af9e01e0ef80d95ae3c720be101227edae5f2fe7e3dc63d8857fadfc5a1d"},
+    {file = "librt-0.8.1-cp313-cp313-win32.whl", hash = "sha256:086a32dbb71336627e78cc1d6ee305a68d038ef7d4c39aaff41ae8c9aa46e91a"},
+    {file = "librt-0.8.1-cp313-cp313-win_amd64.whl", hash = "sha256:e11769a1dbda4da7b00a76cfffa67aa47cfa66921d2724539eee4b9ede780b79"},
+    {file = "librt-0.8.1-cp313-cp313-win_arm64.whl", hash = "sha256:924817ab3141aca17893386ee13261f1d100d1ef410d70afe4389f2359fea4f0"},
+    {file = "librt-0.8.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6cfa7fe54fd4d1f47130017351a959fe5804bda7a0bc7e07a2cdbc3fdd28d34f"},
+    {file = "librt-0.8.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:228c2409c079f8c11fb2e5d7b277077f694cb93443eb760e00b3b83cb8b3176c"},
+    {file = "librt-0.8.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7aae78ab5e3206181780e56912d1b9bb9f90a7249ce12f0e8bf531d0462dd0fc"},
+    {file = "librt-0.8.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:172d57ec04346b047ca6af181e1ea4858086c80bdf455f61994c4aa6fc3f866c"},
+    {file = "librt-0.8.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6b1977c4ea97ce5eb7755a78fae68d87e4102e4aaf54985e8b56806849cc06a3"},
+    {file = "librt-0.8.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:10c42e1f6fd06733ef65ae7bebce2872bcafd8d6e6b0a08fe0a05a23b044fb14"},
+    {file = "librt-0.8.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4c8dfa264b9193c4ee19113c985c95f876fae5e51f731494fc4e0cf594990ba7"},
+    {file = "librt-0.8.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:01170b6729a438f0dedc4a26ed342e3dc4f02d1000b4b19f980e1877f0c297e6"},
+    {file = "librt-0.8.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:7b02679a0d783bdae30d443025b94465d8c3dc512f32f5b5031f93f57ac32071"},
+    {file = "librt-0.8.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:190b109bb69592a3401fe1ffdea41a2e73370ace2ffdc4a0e8e2b39cdea81b78"},
+    {file = "librt-0.8.1-cp314-cp314-win32.whl", hash = "sha256:e70a57ecf89a0f64c24e37f38d3fe217a58169d2fe6ed6d70554964042474023"},
+    {file = "librt-0.8.1-cp314-cp314-win_amd64.whl", hash = "sha256:7e2f3edca35664499fbb36e4770650c4bd4a08abc1f4458eab9df4ec56389730"},
+    {file = "librt-0.8.1-cp314-cp314-win_arm64.whl", hash = "sha256:0d2f82168e55ddefd27c01c654ce52379c0750ddc31ee86b4b266bcf4d65f2a3"},
+    {file = "librt-0.8.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2c74a2da57a094bd48d03fa5d196da83d2815678385d2978657499063709abe1"},
+    {file = "librt-0.8.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a355d99c4c0d8e5b770313b8b247411ed40949ca44e33e46a4789b9293a907ee"},
+    {file = "librt-0.8.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:2eb345e8b33fb748227409c9f1233d4df354d6e54091f0e8fc53acdb2ffedeb7"},
+    {file = "librt-0.8.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9be2f15e53ce4e83cc08adc29b26fb5978db62ef2a366fbdf716c8a6c8901040"},
+    {file = "librt-0.8.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:785ae29c1f5c6e7c2cde2c7c0e148147f4503da3abc5d44d482068da5322fd9e"},
+    {file = "librt-0.8.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1d3a7da44baf692f0c6aeb5b2a09c5e6fc7a703bca9ffa337ddd2e2da53f7732"},
+    {file = "librt-0.8.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5fc48998000cbc39ec0d5311312dda93ecf92b39aaf184c5e817d5d440b29624"},
+    {file = "librt-0.8.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:e96baa6820280077a78244b2e06e416480ed859bbd8e5d641cf5742919d8beb4"},
+    {file = "librt-0.8.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:31362dbfe297b23590530007062c32c6f6176f6099646bb2c95ab1b00a57c382"},
+    {file = "librt-0.8.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cc3656283d11540ab0ea01978378e73e10002145117055e03722417aeab30994"},
+    {file = "librt-0.8.1-cp314-cp314t-win32.whl", hash = "sha256:738f08021b3142c2918c03692608baed43bc51144c29e35807682f8070ee2a3a"},
+    {file = "librt-0.8.1-cp314-cp314t-win_amd64.whl", hash = "sha256:89815a22daf9c51884fb5dbe4f1ef65ee6a146e0b6a8df05f753e2e4a9359bf4"},
+    {file = "librt-0.8.1-cp314-cp314t-win_arm64.whl", hash = "sha256:bf512a71a23504ed08103a13c941f763db13fb11177beb3d9244c98c29fb4a61"},
+    {file = "librt-0.8.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3dff3d3ca8db20e783b1bc7de49c0a2ab0b8387f31236d6a026597d07fcd68ac"},
+    {file = "librt-0.8.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:08eec3a1fc435f0d09c87b6bf1ec798986a3544f446b864e4099633a56fcd9ed"},
+    {file = "librt-0.8.1-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e3f0a41487fd5fad7e760b9e8a90e251e27c2816fbc2cff36a22a0e6bcbbd9dd"},
+    {file = "librt-0.8.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bacdb58d9939d95cc557b4dbaa86527c9db2ac1ed76a18bc8d26f6dc8647d851"},
+    {file = "librt-0.8.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6d7ab1f01aa753188605b09a51faa44a3327400b00b8cce424c71910fc0a128"},
+    {file = "librt-0.8.1-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4998009e7cb9e896569f4be7004f09d0ed70d386fa99d42b6d363f6d200501ac"},
+    {file = "librt-0.8.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2cc68eeeef5e906839c7bb0815748b5b0a974ec27125beefc0f942715785b551"},
+    {file = "librt-0.8.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:0bf69d79a23f4f40b8673a947a234baeeb133b5078b483b7297c5916539cf5d5"},
+    {file = "librt-0.8.1-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:22b46eabd76c1986ee7d231b0765ad387d7673bbd996aa0d0d054b38ac65d8f6"},
+    {file = "librt-0.8.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:237796479f4d0637d6b9cbcb926ff424a97735e68ade6facf402df4ec93375ed"},
+    {file = "librt-0.8.1-cp39-cp39-win32.whl", hash = "sha256:4beb04b8c66c6ae62f8c1e0b2f097c1ebad9295c929a8d5286c05eae7c2fc7dc"},
+    {file = "librt-0.8.1-cp39-cp39-win_amd64.whl", hash = "sha256:64548cde61b692dc0dc379f4b5f59a2f582c2ebe7890d09c1ae3b9e66fa015b7"},
+    {file = "librt-0.8.1.tar.gz", hash = "sha256:be46a14693955b3bd96014ccbdb8339ee8c9346fbe11c1b78901b55125f14c73"},
 ]
 
-[package.extras]
-colors = ["colorama"]
-plugins = ["setuptools"]
-
 [[package]]
-name = "mccabe"
-version = "0.7.0"
-description = "McCabe checker, plugin for flake8"
+name = "mypy"
+version = "1.19.1"
+description = "Optional static typing for Python"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.9"
 groups = ["dev"]
 files = [
-    {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
-    {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
+    {file = "mypy-1.19.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f05aa3d375b385734388e844bc01733bd33c644ab48e9684faa54e5389775ec"},
+    {file = "mypy-1.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:022ea7279374af1a5d78dfcab853fe6a536eebfda4b59deab53cd21f6cd9f00b"},
+    {file = "mypy-1.19.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee4c11e460685c3e0c64a4c5de82ae143622410950d6be863303a1c4ba0e36d6"},
+    {file = "mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de759aafbae8763283b2ee5869c7255391fbc4de3ff171f8f030b5ec48381b74"},
+    {file = "mypy-1.19.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ab43590f9cd5108f41aacf9fca31841142c786827a74ab7cc8a2eacb634e09a1"},
+    {file = "mypy-1.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:2899753e2f61e571b3971747e302d5f420c3fd09650e1951e99f823bc3089dac"},
+    {file = "mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288"},
+    {file = "mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab"},
+    {file = "mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6"},
+    {file = "mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331"},
+    {file = "mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925"},
+    {file = "mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042"},
+    {file = "mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1"},
+    {file = "mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e"},
+    {file = "mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2"},
+    {file = "mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8"},
+    {file = "mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a"},
+    {file = "mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13"},
+    {file = "mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250"},
+    {file = "mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b"},
+    {file = "mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e"},
+    {file = "mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef"},
+    {file = "mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75"},
+    {file = "mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd"},
+    {file = "mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1"},
+    {file = "mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718"},
+    {file = "mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b"},
+    {file = "mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045"},
+    {file = "mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957"},
+    {file = "mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f"},
+    {file = "mypy-1.19.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7bcfc336a03a1aaa26dfce9fff3e287a3ba99872a157561cbfcebe67c13308e3"},
+    {file = "mypy-1.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b7951a701c07ea584c4fe327834b92a30825514c868b1f69c30445093fdd9d5a"},
+    {file = "mypy-1.19.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b13cfdd6c87fc3efb69ea4ec18ef79c74c3f98b4e5498ca9b85ab3b2c2329a67"},
+    {file = "mypy-1.19.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f28f99c824ecebcdaa2e55d82953e38ff60ee5ec938476796636b86afa3956e"},
+    {file = "mypy-1.19.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c608937067d2fc5a4dd1a5ce92fd9e1398691b8c5d012d66e1ddd430e9244376"},
+    {file = "mypy-1.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:409088884802d511ee52ca067707b90c883426bd95514e8cfda8281dc2effe24"},
+    {file = "mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247"},
+    {file = "mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba"},
 ]
 
+[package.dependencies]
+librt = {version = ">=0.6.2", markers = "platform_python_implementation != \"PyPy\""}
+mypy_extensions = ">=1.0.0"
+pathspec = ">=0.9.0"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing_extensions = ">=4.6.0"
+
+[package.extras]
+dmypy = ["psutil (>=4.0)"]
+faster-cache = ["orjson"]
+install-types = ["pip"]
+mypyc = ["setuptools (>=50)"]
+reports = ["lxml"]
+
 [[package]]
 name = "mypy-extensions"
 version = "1.1.0"
@@ -349,23 +368,6 @@ optional = ["typing-extensions (>=4)"]
 re2 = ["google-re2 (>=1.1)"]
 tests = ["pytest (>=9)", "typing-extensions (>=4.15)"]
 
-[[package]]
-name = "platformdirs"
-version = "4.3.7"
-description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
-optional = false
-python-versions = ">=3.9"
-groups = ["dev"]
-files = [
-    {file = "platformdirs-4.3.7-py3-none-any.whl", hash = "sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94"},
-    {file = "platformdirs-4.3.7.tar.gz", hash = "sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351"},
-]
-
-[package.extras]
-docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"]
-test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"]
-type = ["mypy (>=1.14.1)"]
-
 [[package]]
 name = "pluggy"
 version = "1.5.0"
@@ -382,30 +384,6 @@ files = [
 dev = ["pre-commit", "tox"]
 testing = ["pytest", "pytest-benchmark"]
 
-[[package]]
-name = "pycodestyle"
-version = "2.14.0"
-description = "Python style guide checker"
-optional = false
-python-versions = ">=3.9"
-groups = ["dev"]
-files = [
-    {file = "pycodestyle-2.14.0-py2.py3-none-any.whl", hash = "sha256:dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d"},
-    {file = "pycodestyle-2.14.0.tar.gz", hash = "sha256:c4b5b517d278089ff9d0abdec919cd97262a3367449ea1c8b49b91529167b783"},
-]
-
-[[package]]
-name = "pyflakes"
-version = "3.4.0"
-description = "passive checker of Python programs"
-optional = false
-python-versions = ">=3.9"
-groups = ["dev"]
-files = [
-    {file = "pyflakes-3.4.0-py2.py3-none-any.whl", hash = "sha256:f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f"},
-    {file = "pyflakes-3.4.0.tar.gz", hash = "sha256:b24f96fafb7d2ab0ec5075b7350b3d2d2218eab42003821c06344973d3ea2f58"},
-]
-
 [[package]]
 name = "pygments"
 version = "2.19.1"
@@ -421,36 +399,6 @@ files = [
 [package.extras]
 windows-terminal = ["colorama (>=0.4.6)"]
 
-[[package]]
-name = "pylint"
-version = "4.0.5"
-description = "python code static checker"
-optional = false
-python-versions = ">=3.10.0"
-groups = ["dev"]
-files = [
-    {file = "pylint-4.0.5-py3-none-any.whl", hash = "sha256:00f51c9b14a3b3ae08cff6b2cdd43f28165c78b165b628692e428fb1f8dc2cf2"},
-    {file = "pylint-4.0.5.tar.gz", hash = "sha256:8cd6a618df75deb013bd7eb98327a95f02a6fb839205a6bbf5456ef96afb317c"},
-]
-
-[package.dependencies]
-astroid = ">=4.0.2,<=4.1.dev0"
-colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
-dill = [
-    {version = ">=0.2", markers = "python_version < \"3.11\""},
-    {version = ">=0.3.7", markers = "python_version >= \"3.12\""},
-    {version = ">=0.3.6", markers = "python_version == \"3.11\""},
-]
-isort = ">=5,<5.13 || >5.13,<9"
-mccabe = ">=0.6,<0.8"
-platformdirs = ">=2.2"
-tomli = {version = ">=1.1", markers = "python_version < \"3.11\""}
-tomlkit = ">=0.10.1"
-
-[package.extras]
-spelling = ["pyenchant (>=3.2,<4.0)"]
-testutils = ["gitpython (>3)"]
-
 [[package]]
 name = "pytest"
 version = "9.0.2"
@@ -496,59 +444,49 @@ pytest = ">=7"
 testing = ["process-tests", "pytest-xdist", "virtualenv"]
 
 [[package]]
-name = "pytokens"
-version = "0.4.1"
-description = "A Fast, spec compliant Python 3.14+ tokenizer that runs on older Pythons."
+name = "ruff"
+version = "0.11.13"
+description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.7"
 groups = ["dev"]
 files = [
-    {file = "pytokens-0.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a44ed93ea23415c54f3face3b65ef2b844d96aeb3455b8a69b3df6beab6acc5"},
-    {file = "pytokens-0.4.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:add8bf86b71a5d9fb5b89f023a80b791e04fba57960aa790cc6125f7f1d39dfe"},
-    {file = "pytokens-0.4.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:670d286910b531c7b7e3c0b453fd8156f250adb140146d234a82219459b9640c"},
-    {file = "pytokens-0.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4e691d7f5186bd2842c14813f79f8884bb03f5995f0575272009982c5ac6c0f7"},
-    {file = "pytokens-0.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:27b83ad28825978742beef057bfe406ad6ed524b2d28c252c5de7b4a6dd48fa2"},
-    {file = "pytokens-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d70e77c55ae8380c91c0c18dea05951482e263982911fc7410b1ffd1dadd3440"},
-    {file = "pytokens-0.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a58d057208cb9075c144950d789511220b07636dd2e4708d5645d24de666bdc"},
-    {file = "pytokens-0.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b49750419d300e2b5a3813cf229d4e5a4c728dae470bcc89867a9ad6f25a722d"},
-    {file = "pytokens-0.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d9907d61f15bf7261d7e775bd5d7ee4d2930e04424bab1972591918497623a16"},
-    {file = "pytokens-0.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:ee44d0f85b803321710f9239f335aafe16553b39106384cef8e6de40cb4ef2f6"},
-    {file = "pytokens-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:140709331e846b728475786df8aeb27d24f48cbcf7bcd449f8de75cae7a45083"},
-    {file = "pytokens-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d6c4268598f762bc8e91f5dbf2ab2f61f7b95bdc07953b602db879b3c8c18e1"},
-    {file = "pytokens-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24afde1f53d95348b5a0eb19488661147285ca4dd7ed752bbc3e1c6242a304d1"},
-    {file = "pytokens-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5ad948d085ed6c16413eb5fec6b3e02fa00dc29a2534f088d3302c47eb59adf9"},
-    {file = "pytokens-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:3f901fe783e06e48e8cbdc82d631fca8f118333798193e026a50ce1b3757ea68"},
-    {file = "pytokens-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8bdb9d0ce90cbf99c525e75a2fa415144fd570a1ba987380190e8b786bc6ef9b"},
-    {file = "pytokens-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5502408cab1cb18e128570f8d598981c68a50d0cbd7c61312a90507cd3a1276f"},
-    {file = "pytokens-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29d1d8fb1030af4d231789959f21821ab6325e463f0503a61d204343c9b355d1"},
-    {file = "pytokens-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:970b08dd6b86058b6dc07efe9e98414f5102974716232d10f32ff39701e841c4"},
-    {file = "pytokens-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:9bd7d7f544d362576be74f9d5901a22f317efc20046efe2034dced238cbbfe78"},
-    {file = "pytokens-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4a14d5f5fc78ce85e426aa159489e2d5961acf0e47575e08f35584009178e321"},
-    {file = "pytokens-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f50fd18543be72da51dd505e2ed20d2228c74e0464e4262e4899797803d7fa"},
-    {file = "pytokens-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc74c035f9bfca0255c1af77ddd2d6ae8419012805453e4b0e7513e17904545d"},
-    {file = "pytokens-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f66a6bbe741bd431f6d741e617e0f39ec7257ca1f89089593479347cc4d13324"},
-    {file = "pytokens-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:b35d7e5ad269804f6697727702da3c517bb8a5228afa450ab0fa787732055fc9"},
-    {file = "pytokens-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8fcb9ba3709ff77e77f1c7022ff11d13553f3c30299a9fe246a166903e9091eb"},
-    {file = "pytokens-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79fc6b8699564e1f9b521582c35435f1bd32dd06822322ec44afdeba666d8cb3"},
-    {file = "pytokens-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d31b97b3de0f61571a124a00ffe9a81fb9939146c122c11060725bd5aea79975"},
-    {file = "pytokens-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:967cf6e3fd4adf7de8fc73cd3043754ae79c36475c1c11d514fc72cf5490094a"},
-    {file = "pytokens-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:584c80c24b078eec1e227079d56dc22ff755e0ba8654d8383b2c549107528918"},
-    {file = "pytokens-0.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:da5baeaf7116dced9c6bb76dc31ba04a2dc3695f3d9f74741d7910122b456edc"},
-    {file = "pytokens-0.4.1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11edda0942da80ff58c4408407616a310adecae1ddd22eef8c692fe266fa5009"},
-    {file = "pytokens-0.4.1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0fc71786e629cef478cbf29d7ea1923299181d0699dbe7c3c0f4a583811d9fc1"},
-    {file = "pytokens-0.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dcafc12c30dbaf1e2af0490978352e0c4041a7cde31f4f81435c2a5e8b9cabb6"},
-    {file = "pytokens-0.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:42f144f3aafa5d92bad964d471a581651e28b24434d184871bd02e3a0d956037"},
-    {file = "pytokens-0.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:34bcc734bd2f2d5fe3b34e7b3c0116bfb2397f2d9666139988e7a3eb5f7400e3"},
-    {file = "pytokens-0.4.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941d4343bf27b605e9213b26bfa1c4bf197c9c599a9627eb7305b0defcfe40c1"},
-    {file = "pytokens-0.4.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3ad72b851e781478366288743198101e5eb34a414f1d5627cdd585ca3b25f1db"},
-    {file = "pytokens-0.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:682fa37ff4d8e95f7df6fe6fe6a431e8ed8e788023c6bcc0f0880a12eab80ad1"},
-    {file = "pytokens-0.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:30f51edd9bb7f85c748979384165601d028b84f7bd13fe14d3e065304093916a"},
-    {file = "pytokens-0.4.1-py3-none-any.whl", hash = "sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de"},
-    {file = "pytokens-0.4.1.tar.gz", hash = "sha256:292052fe80923aae2260c073f822ceba21f3872ced9a68bb7953b348e561179a"},
+    {file = "ruff-0.11.13-py3-none-linux_armv6l.whl", hash = "sha256:4bdfbf1240533f40042ec00c9e09a3aade6f8c10b6414cf11b519488d2635d46"},
+    {file = "ruff-0.11.13-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:aef9c9ed1b5ca28bb15c7eac83b8670cf3b20b478195bd49c8d756ba0a36cf48"},
+    {file = "ruff-0.11.13-py3-none-macosx_11_0_arm64.whl", hash = "sha256:53b15a9dfdce029c842e9a5aebc3855e9ab7771395979ff85b7c1dedb53ddc2b"},
+    {file = "ruff-0.11.13-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab153241400789138d13f362c43f7edecc0edfffce2afa6a68434000ecd8f69a"},
+    {file = "ruff-0.11.13-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6c51f93029d54a910d3d24f7dd0bb909e31b6cd989a5e4ac513f4eb41629f0dc"},
+    {file = "ruff-0.11.13-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1808b3ed53e1a777c2ef733aca9051dc9bf7c99b26ece15cb59a0320fbdbd629"},
+    {file = "ruff-0.11.13-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:d28ce58b5ecf0f43c1b71edffabe6ed7f245d5336b17805803312ec9bc665933"},
+    {file = "ruff-0.11.13-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:55e4bc3a77842da33c16d55b32c6cac1ec5fb0fbec9c8c513bdce76c4f922165"},
+    {file = "ruff-0.11.13-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:633bf2c6f35678c56ec73189ba6fa19ff1c5e4807a78bf60ef487b9dd272cc71"},
+    {file = "ruff-0.11.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ffbc82d70424b275b089166310448051afdc6e914fdab90e08df66c43bb5ca9"},
+    {file = "ruff-0.11.13-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:4a9ddd3ec62a9a89578c85842b836e4ac832d4a2e0bfaad3b02243f930ceafcc"},
+    {file = "ruff-0.11.13-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d237a496e0778d719efb05058c64d28b757c77824e04ffe8796c7436e26712b7"},
+    {file = "ruff-0.11.13-py3-none-musllinux_1_2_i686.whl", hash = "sha256:26816a218ca6ef02142343fd24c70f7cd8c5aa6c203bca284407adf675984432"},
+    {file = "ruff-0.11.13-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:51c3f95abd9331dc5b87c47ac7f376db5616041173826dfd556cfe3d4977f492"},
+    {file = "ruff-0.11.13-py3-none-win32.whl", hash = "sha256:96c27935418e4e8e77a26bb05962817f28b8ef3843a6c6cc49d8783b5507f250"},
+    {file = "ruff-0.11.13-py3-none-win_amd64.whl", hash = "sha256:29c3189895a8a6a657b7af4e97d330c8a3afd2c9c8f46c81e2fc5a31866517e3"},
+    {file = "ruff-0.11.13-py3-none-win_arm64.whl", hash = "sha256:b4385285e9179d608ff1d2fb9922062663c658605819a6876d8beef0c30b7f3b"},
+    {file = "ruff-0.11.13.tar.gz", hash = "sha256:26fa247dc68d1d4e72c179e08889a25ac0c7ba4d78aecfc835d49cbfd60bf514"},
+]
+
+[[package]]
+name = "sqlglot"
+version = "30.0.3"
+description = "An easily customizable SQL parser and transpiler"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "sqlglot-30.0.3-py3-none-any.whl", hash = "sha256:5489cc98b5666f1fafc21e0304ca286e513e142aa054ee5760806a2139d07a05"},
+    {file = "sqlglot-30.0.3.tar.gz", hash = "sha256:35ba7514c132b54f87fd1732a65a73615efa9fd83f6e1eed0a315bc9ee3e1027"},
 ]
 
 [package.extras]
-dev = ["black", "build", "mypy", "pytest", "pytest-cov", "setuptools", "tox", "twine", "wheel"]
+c = ["sqlglotc (==30.0.3)"]
+dev = ["duckdb (>=0.6)", "pandas", "pandas-stubs", "pdoc", "pre-commit", "pyperf", "python-dateutil", "pytz", "ruff (==0.15.6)", "setuptools_scm", "sqlglot-mypy (>=1.19.1.post1)", "types-python-dateutil", "types-pytz", "typing_extensions"]
+rs = ["sqlglotc (==30.0.3)", "sqlglotrs (==0.13.0)"]
 
 [[package]]
 name = "sqlparse"
@@ -573,7 +511,7 @@ description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
 groups = ["dev"]
-markers = "python_version == \"3.10\""
+markers = "python_full_version <= \"3.11.0a6\""
 files = [
     {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
     {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@@ -609,32 +547,19 @@ files = [
     {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
 ]
 
-[[package]]
-name = "tomlkit"
-version = "0.13.2"
-description = "Style preserving TOML library"
-optional = false
-python-versions = ">=3.8"
-groups = ["dev"]
-files = [
-    {file = "tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde"},
-    {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"},
-]
-
 [[package]]
 name = "typing-extensions"
-version = "4.13.2"
-description = "Backported and Experimental Type Hints for Python 3.8+"
+version = "4.15.0"
+description = "Backported and Experimental Type Hints for Python 3.9+"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 groups = ["dev"]
-markers = "python_version == \"3.10\""
 files = [
-    {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"},
-    {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"},
+    {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"},
+    {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"},
 ]
 
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.10"
-content-hash = "a44741e2c45e6702fb176a07d1bacb6b4f3e887d907bb2d8c1439785edded9c3"
+content-hash = "bf0ac67ffa320d1ed6a0f60a19f6a0243d54233d3c754ef5fbb3b3fd47a1ff03"
diff --git a/pyproject.toml b/pyproject.toml
index f402d82c..827a00e3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,19 +15,38 @@ packages = [
 [tool.poetry.dependencies]
 python = "^3.10"
 sqlparse = ">=0.4.1,<0.6.0"
+sqlglot = "^30.0.3"
 
-[tool.poetry.dev-dependencies]
-black = "^26.3"
+[tool.poetry.group.dev.dependencies]
 coverage = {extras = ["toml"], version = "^7.13"}
-pylint = "^4.0.5"
 pytest = "^9.0.2"
 pytest-cov = "^7.1.0"
-flake8 = "^7.3.0"
+ruff = "^0.11"
+mypy = "^1.19"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
 
+[tool.ruff]
+line-length = 88
+target-version = "py310"
+
+[tool.ruff.lint]
+select = ["E", "F", "W", "C90", "I"]
+
+[tool.ruff.lint.mccabe]
+max-complexity = 8
+
+[tool.mypy]
+python_version = "3.10"
+warn_return_any = true
+warn_unused_configs = true
+check_untyped_defs = true
+disallow_untyped_defs = true
+disallow_any_generics = true
+ignore_missing_imports = true
+
 [tool.coverage.run]
 relative_files = true
 
diff --git a/sql_metadata/__init__.py b/sql_metadata/__init__.py
index cb4048f5..183e47bf 100644
--- a/sql_metadata/__init__.py
+++ b/sql_metadata/__init__.py
@@ -1,10 +1,22 @@
-"""
-Module for parsing sql queries and returning columns,
-tables, names of with statements etc.
+"""Parse SQL queries and extract structural metadata.
+
+The ``sql-metadata`` package analyses SQL statements and returns the
+tables, columns, aliases, CTE definitions, subqueries, values, comments,
+and query type they contain.  The primary entry point is :class:`Parser`::
+
+    from sql_metadata import Parser
+
+    parser = Parser("SELECT id, name FROM users WHERE active = 1")
+    print(parser.tables)   # ['users']
+    print(parser.columns)  # ['id', 'name', 'active']
+
+Under the hood the library delegates to `sqlglot <https://github.com/tobymao/sqlglot>`_
+for AST construction and tokenization, with custom dialect handling for
+MSSQL, MySQL, Hive/Spark, and TSQL bracket notation.
 """
 
-# pylint:disable=unsubscriptable-object
-from sql_metadata.parser import Parser
+from sql_metadata.exceptions import InvalidQueryDefinition
 from sql_metadata.keywords_lists import QueryType
+from sql_metadata.parser import Parser
 
-__all__ = ["Parser", "QueryType"]
+__all__ = ["InvalidQueryDefinition", "Parser", "QueryType"]
diff --git a/sql_metadata/ast_parser.py b/sql_metadata/ast_parser.py
new file mode 100644
index 00000000..fe33a191
--- /dev/null
+++ b/sql_metadata/ast_parser.py
@@ -0,0 +1,107 @@
+"""Wrap ``sqlglot.parse()`` to produce an AST from raw SQL strings.
+
+Thin orchestrator that composes :class:`~sql_cleaner.SqlCleaner` (raw SQL
+preprocessing) and :class:`~dialect_parser.DialectParser` (dialect
+detection, parsing, quality validation) so that downstream extractors
+always receive a clean ``sqlglot.exp.Expression`` tree (or ``None`` /
+``ValueError``).
+"""
+
+from sqlglot import exp
+from sqlglot.dialects.dialect import DialectType
+
+from sql_metadata.dialect_parser import DialectParser
+from sql_metadata.sql_cleaner import SqlCleaner
+
+
+class ASTParser:
+    """Lazy wrapper around SQL parsing with dialect auto-detection.
+
+    Instantiated once per :class:`Parser` with the raw SQL string.  The
+    actual parsing is deferred until :attr:`ast` is first accessed, at
+    which point the SQL is cleaned and parsed through one or more sqlglot
+    dialects until a satisfactory AST is obtained.
+
+    :param sql: Raw SQL query string.
+    :type sql: str
+    """
+
+    def __init__(self, sql: str) -> None:
+        self._raw_sql = sql
+        self._ast: exp.Expression | None = None
+        self._dialect: DialectType = None
+        self._parsed = False
+        self._is_replace = False
+        self._cte_name_map: dict[str, str] = {}
+
+    @property
+    def ast(self) -> exp.Expression | None:
+        """The sqlglot AST for the query, lazily parsed on first access.
+
+        :returns: Root AST node, or ``None`` for empty/comment-only queries.
+        :rtype: exp.Expression
+        :raises ValueError: If the SQL is malformed and cannot be parsed.
+        """
+        if self._parsed:
+            return self._ast
+        self._parsed = True
+        self._ast = self._parse(self._raw_sql)
+        return self._ast
+
+    @property
+    def dialect(self) -> DialectType:
+        """The sqlglot dialect that produced the current AST.
+
+        Set as a side-effect of :attr:`ast` access.  May be ``None``
+        (default dialect), a string like ``"mysql"``, or a custom
+        :class:`Dialect` subclass such as :class:`HashVarDialect`.
+        """
+        _ = self.ast
+        return self._dialect
+
+    @property
+    def is_replace(self) -> bool:
+        """Whether the original query was a ``REPLACE INTO`` statement.
+
+        ``REPLACE INTO`` is rewritten to ``INSERT INTO`` before parsing
+        (sqlglot otherwise produces an opaque ``Command`` node).  This
+        flag allows :attr:`Parser.query_type` to restore the correct
+        :class:`QueryType.REPLACE` value.
+        """
+        _ = self.ast
+        return self._is_replace
+
+    @property
+    def cte_name_map(self) -> dict[str, str]:
+        """Map of placeholder CTE names back to their original qualified form.
+
+        Keys are underscore-separated placeholders (``db__DOT__name``),
+        values are the original dotted names (``db.name``).
+        """
+        _ = self.ast
+        return self._cte_name_map
+
+    def _parse(self, sql: str) -> exp.Expression | None:
+        """Parse *sql* into a sqlglot AST.
+
+        Delegates preprocessing to :class:`SqlCleaner` and dialect
+        detection / parsing to :class:`DialectParser`.
+
+        :param sql: Raw SQL string (may include comments).
+        :type sql: str
+        :returns: Root AST node, or ``None`` for empty input.
+        :rtype: exp.Expression | None
+        :raises ValueError: If the SQL is malformed.
+        """
+        if not sql or not sql.strip():
+            return None
+
+        result = SqlCleaner.clean(sql)
+        if result.sql is None:
+            return None
+
+        self._is_replace = result.is_replace
+        self._cte_name_map = result.cte_name_map
+
+        ast, self._dialect = DialectParser().parse(result.sql)
+        return ast
diff --git a/sql_metadata/column_extractor.py b/sql_metadata/column_extractor.py
new file mode 100644
index 00000000..7718a0ef
--- /dev/null
+++ b/sql_metadata/column_extractor.py
@@ -0,0 +1,1087 @@
+"""Single-pass SQL metadata extraction from a sqlglot AST.
+
+Walks the AST in ``arg_types``-key order (which mirrors the left-to-right
+SQL text order) and collects columns, column aliases, CTE names, and
+subquery names into a :class:`_Collector` accumulator.  The
+:class:`ColumnExtractor` class encapsulates the walk and all helper methods,
+replacing the earlier flat-function design with a cohesive class.
+
+The public entry point is :meth:`ColumnExtractor.extract`, which returns an
+:class:`ExtractionResult` dataclass consumed by :attr:`Parser.columns`
+and friends.
+"""
+
+from dataclasses import dataclass
+from typing import Any
+
+from sqlglot import exp
+
+from sql_metadata.exceptions import InvalidQueryDefinition
+from sql_metadata.utils import UniqueList, last_segment
+
+# ---------------------------------------------------------------------------
+# Result dataclass
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class ExtractionResult:
+    """Immutable container for column extraction results.
+
+    Replaces the earlier 7-tuple return value with named fields.
+    """
+
+    columns: UniqueList
+    columns_dict: dict[str, UniqueList]
+    alias_names: UniqueList
+    alias_dict: dict[str, UniqueList] | None
+    alias_map: dict[str, str | list[str]]
+    cte_names: UniqueList
+    subquery_names: UniqueList
+    output_columns: list[str]
+
+
+# ---------------------------------------------------------------------------
+# Clause classification (pure functions, no state)
+# ---------------------------------------------------------------------------
+
+
+#: Simple key → clause-name lookup for most ``arg_types`` keys.
+_CLAUSE_MAP: dict[str, str] = {
+    "where": "where",
+    "group": "group_by",
+    "order": "order_by",
+    "having": "having",
+}
+
+#: Keys that map to the ``"join"`` clause section.
+_JOIN_KEYS = frozenset({"on", "using"})
+
+
+def _classify_expressions_clause(parent_type: type) -> str:
+    """Resolve the clause for an ``"expressions"`` key based on the parent node.
+
+    :param parent_type: The type of the parent AST node.
+    :returns: ``"update"``, ``"select"``, or ``""`` for other parents.
+    """
+    if parent_type is exp.Update:
+        return "update"
+    if parent_type is exp.Select:
+        return "select"
+    return ""
+
+
+def _classify_clause(key: str, parent_type: type) -> str:
+    """Map an ``arg_types`` key and parent node type to a ``columns_dict`` section.
+
+    :param key: The ``arg_types`` key through which the child was reached.
+    :param parent_type: The type of the parent AST node.
+    :returns: Section name string, or ``""`` if the key does not map.
+    """
+    if key == "expressions":
+        return _classify_expressions_clause(parent_type)
+    if key in _JOIN_KEYS:
+        return "join"
+    return _CLAUSE_MAP.get(key, "")
+
+
+# ---------------------------------------------------------------------------
+# Pure helpers (no state)
+# ---------------------------------------------------------------------------
+
+
+def _dfs(node: exp.Expression) -> Any:
+    """Yield *node* and all its descendants in depth-first order.
+
+    :param node: Root expression node.
+    :yields: Each expression node in DFS pre-order.
+    """
+    yield node
+    for child in node.iter_expressions():
+        yield from _dfs(child)
+
+
+#: Functions whose first argument is a date-part unit keyword, not a column.
+_DATE_PART_FUNCTIONS = frozenset(
+    {
+        "dateadd",
+        "datediff",
+        "datepart",
+        "datename",
+        "date_add",
+        "date_sub",
+        "date_diff",
+        "date_trunc",
+        "timestampadd",
+        "timestampdiff",
+    }
+)
+
+
+def _is_date_part_unit(node: exp.Column) -> bool:
+    """Return True if *node* is the first arg of a date-part function."""
+    parent = node.parent
+    if (
+        isinstance(parent, exp.Anonymous)
+        and parent.this.lower() in _DATE_PART_FUNCTIONS
+    ):
+        exprs = parent.expressions
+        return len(exprs) > 0 and exprs[0] is node
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Collector — accumulates results during AST walk
+# ---------------------------------------------------------------------------
+
+
+class _Collector:
+    """Mutable accumulator for metadata gathered during the AST walk.
+
+    :param table_aliases: Pre-computed table alias → real name mapping.
+    """
+
+    __slots__ = (
+        "ta",
+        "columns",
+        "columns_dict",
+        "alias_names",
+        "alias_dict",
+        "alias_map",
+        "cte_names",
+        "cte_alias_names",
+        "subquery_items",
+        "output_columns",
+    )
+
+    def __init__(self, table_aliases: dict[str, str]):
+        self.ta = table_aliases
+        self.columns = UniqueList()
+        self.columns_dict: dict[str, UniqueList] = {}
+        self.alias_names = UniqueList()
+        self.alias_dict: dict[str, UniqueList] = {}
+        self.alias_map: dict[str, str | list[str]] = {}
+        self.cte_names = UniqueList()
+        self.cte_alias_names: set[str] = set()
+        self.subquery_items: list[tuple[int, str]] = []
+        self.output_columns: list[str] = []
+
+    def add_column(self, name: str, clause: str) -> None:
+        """Record a column name, filing it into the appropriate section."""
+        self.columns.append(name)
+        if clause:
+            self.columns_dict.setdefault(clause, UniqueList()).append(name)
+
+    def add_alias(self, name: str, target: Any, clause: str) -> None:
+        """Record a column alias and its target expression."""
+        self.alias_names.append(name)
+        if clause:
+            self.alias_dict.setdefault(clause, UniqueList()).append(name)
+        if target is not None:
+            self.alias_map[name] = target
+
+
+# ---------------------------------------------------------------------------
+# arg_types keys to skip during the walk.
+# ---------------------------------------------------------------------------
+
+_SKIP_KEYS = frozenset({"conflict", "returning", "alternative"})
+
+
+# ---------------------------------------------------------------------------
+# ColumnExtractor — the main class
+# ---------------------------------------------------------------------------
+
+
+class ColumnExtractor:
+    """Single-pass DFS extraction of columns, aliases, CTEs, and subqueries.
+
+    Walks the AST in ``arg_types``-key order (which mirrors the left-to-right
+    SQL text order) and collects all metadata into an internal
+    :class:`_Collector`.  Call :meth:`extract` to run the walk and return an
+    :class:`ExtractionResult`.
+
+    The class is designed around a single public entry point
+    (:meth:`extract`), which triggers a recursive depth-first traversal of
+    the sqlglot AST.  Specialised handler methods process leaf-like nodes
+    (columns, aliases, CTEs, subqueries) while the walk engine manages
+    clause classification and child iteration.
+
+    :param ast: Root sqlglot AST node (e.g. ``Select``, ``Insert``,
+        ``Create``).
+    :param table_aliases: Pre-computed mapping of table alias names to
+        their real (resolved) table names.
+    :param cte_name_map: Optional mapping of placeholder CTE names
+        (produced by :class:`SqlCleaner`) back to the original qualified
+        CTE names.
+    """
+
+    def __init__(
+        self,
+        ast: exp.Expression,
+        table_aliases: dict[str, str],
+        cte_name_map: dict[str, str] | None = None,
+    ):
+        self._ast = ast
+        self._table_aliases = table_aliases
+        self._cte_name_map = cte_name_map or {}
+        self._collector = _Collector(table_aliases)
+        self._reverse_cte_map = self._cte_name_map
+
+    # -------------------------------------------------------------------
+    # Public API
+    # -------------------------------------------------------------------
+
+    def extract(self) -> ExtractionResult:
+        """Run the full extraction walk and return an immutable result.
+
+        Orchestrates the three-phase extraction process:
+
+        1. **Seed** — pre-populate CTE names so downstream handlers can
+           recognise CTE column-alias references.
+        2. **Walk** — depth-first traversal of the AST, dispatching each
+           node to the appropriate handler.
+        3. **Finalise** — restore qualified CTE names, sort subquery
+           names, and package everything into an :class:`ExtractionResult`.
+
+        For ``CREATE TABLE`` statements without a ``SELECT`` body (pure
+        DDL), only ``ColumnDef`` nodes are collected during the walk.
+
+        Example SQL::
+
+            SELECT a, b FROM t WHERE a > 1
+
+        :returns: An :class:`ExtractionResult` containing columns,
+            aliases, CTE names, subquery names, and output columns.
+        """
+        c = self._collector
+
+        self._seed_cte_names()
+
+        # Reset cte_names — walk will re-collect them in text order
+        c.cte_names = UniqueList()
+        self._walk(self._ast)
+
+        # Restore qualified CTE names (reverse placeholder mapping)
+        final_cte = UniqueList()
+        for name in c.cte_names:
+            final_cte.append(self._reverse_cte_map.get(name, name))
+
+        alias_dict = c.alias_dict if c.alias_dict else None
+        return ExtractionResult(
+            columns=c.columns,
+            columns_dict=c.columns_dict,
+            alias_names=c.alias_names,
+            alias_dict=alias_dict,
+            alias_map=c.alias_map,
+            cte_names=final_cte,
+            subquery_names=self._build_subquery_names(),
+            output_columns=c.output_columns,
+        )
+
+    # -------------------------------------------------------------------
+    # Setup helpers
+    # -------------------------------------------------------------------
+
+    def _seed_cte_names(self) -> None:
+        """Pre-populate CTE names in the collector before the main walk.
+
+        Scans the AST for all ``CTE`` nodes and records their alias
+        names.  This allows :meth:`_handle_column` to recognize
+        references like ``cte_name.col`` as CTE column-alias references
+        rather than regular columns.
+
+        Example SQL::
+
+            WITH sales AS (SELECT id FROM orders) SELECT sales.id FROM sales
+
+        The seed step records ``"sales"`` so that ``sales.id`` in the
+        outer SELECT can be identified as a CTE-qualified reference.
+        """
+        for cte in self._ast.find_all(exp.CTE):
+            alias = cte.alias
+            if alias:
+                self._collector.cte_names.append(
+                    self._reverse_cte_map.get(alias, alias)
+                )
+
+    def _build_subquery_names(self) -> UniqueList:
+        """Sort collected subquery items by depth and return their names.
+
+        Subqueries are collected during the walk with their nesting
+        depth.  This method sorts them innermost-first (descending depth)
+        and returns a :class:`UniqueList` of alias names in that order.
+
+        Example SQL::
+
+            SELECT (SELECT 1) AS a, (SELECT 2) AS b FROM t
+
+        :returns: A :class:`UniqueList` of subquery alias names, ordered
+            from innermost to outermost.
+        """
+        c = self._collector
+        c.subquery_items.sort(key=lambda x: -x[0])
+        names = UniqueList()
+        for _, name in c.subquery_items:
+            names.append(name)
+        return names
+
+    # -------------------------------------------------------------------
+    # DFS walk engine
+    # -------------------------------------------------------------------
+
+    def _walk(
+        self, node: exp.Expression, clause: str = "", depth: int = 0
+    ) -> None:
+        """Perform a depth-first walk of the AST in ``arg_types`` key order.
+
+        This is the core recursive method.  For each node it first
+        attempts leaf dispatch via :meth:`_dispatch_leaf`.  If the node
+        is not a leaf, it iterates the node's ``arg_types`` keys in
+        declaration order (which mirrors SQL text order) and recurses
+        into each populated child.
+
+        :param node: The current AST node to process.
+        :param clause: The current SQL clause context (e.g. ``"select"``,
+            ``"where"``).  Propagated to child nodes and used to file
+            columns into ``columns_dict`` sections.
+        :param depth: Current nesting depth, used to sort subqueries by
+            depth (innermost first).
+        """
+        assert node is not None
+
+        if self._dispatch_leaf(node, clause, depth):
+            return
+
+        if hasattr(node, "arg_types"):
+            self._walk_children(node, clause, depth)
+
+    def _walk_children(self, node: exp.Expression, clause: str, depth: int) -> None:
+        """Iterate and recurse into children of *node* in ``arg_types`` key order.
+
+        For each child key, determines the SQL clause context (e.g.
+        ``"where"`` → ``where``, ``"on"`` → ``join``) via
+        :func:`_classify_clause`.  Special-case keys (SELECT expressions,
+        INSERT schema, JOIN USING) are routed to dedicated handlers via
+        :meth:`_process_child_key`; all others get the default recursive
+        walk via :meth:`_recurse_child`.
+
+        :param node: Parent AST node whose children are being iterated.
+        :param clause: Inherited clause context from the parent.
+        :param depth: Current nesting depth.
+        """
+        for key in node.arg_types:
+            if key in _SKIP_KEYS:
+                continue
+            child = node.args.get(key)
+            if child is None:
+                continue
+
+            new_clause = _classify_clause(key, type(node)) or clause
+
+            if not self._process_child_key(node, key, child, new_clause, depth):
+                self._recurse_child(child, new_clause, depth)
+
+    def _dispatch_leaf(self, node: exp.Expression, clause: str, depth: int) -> bool:
+        """Dispatch leaf-like AST nodes to their specialised handlers.
+
+        Checks if *node* is a terminal or semi-terminal node type that
+        should be handled directly rather than recursed into.  Each
+        branch delegates to the appropriate handler and returns ``True``
+        to stop further recursion, or ``False`` to let the walk continue.
+
+        :param node: The AST node to inspect.
+        :param clause: Current clause context.
+        :param depth: Current nesting depth.
+        :returns: ``True`` if the node was handled (caller should stop
+            recursion), ``False`` to continue the walk.
+        """
+        if self._is_literal_values_without_subquery(node):
+            # e.g. INSERT INTO t VALUES (1, 2) — skip literal value lists
+            return True
+        if isinstance(node, (exp.Star, exp.ColumnDef, exp.Identifier)):
+            if isinstance(node, exp.ColumnDef):
+                # e.g. CREATE TABLE t (col INT) — collect ColumnDef names
+                self._collector.add_column(node.name, clause)
+            # Star and Identifier are terminal — no further recursion
+            return True
+        if isinstance(node, exp.CTE):
+            # e.g. WITH cte AS (SELECT ...) — delegate to CTE handler
+            self._handle_cte(node, depth)
+            return True
+        if isinstance(node, exp.Column):
+            # e.g. SELECT t.col FROM t — delegate to column handler
+            self._handle_column(node, clause)
+            return True
+        if isinstance(node, exp.Subquery) and node.alias:
+            # e.g. SELECT (SELECT 1) AS sub — record named subquery
+            self._collector.subquery_items.append((depth, node.alias))
+        return False
+
+    def _process_child_key(
+        self, node: exp.Expression, key: str, child: Any, clause: str, depth: int
+    ) -> bool:
+        """Route special ``arg_types`` keys to dedicated handlers.
+
+        Intercepts three specific key/parent combinations that need
+        custom processing instead of the default recursive walk:
+
+        - ``"expressions"`` on a ``SELECT`` — column list with aliases
+        - ``"this"`` on an ``INSERT`` — schema with target column names
+        - ``"using"`` on a ``JOIN`` — shared column identifiers
+
+        Example SQL::
+
+            SELECT a, b AS c FROM t JOIN t2 USING (id)
+
+        :param node: Parent AST node.
+        :param key: The ``arg_types`` key for the child.
+        :param child: The child node or list of nodes.
+        :param clause: Current clause context.
+        :param depth: Current nesting depth.
+        :returns: ``True`` if handled by a specialised handler,
+            ``False`` for default recursive walk.
+        """
+        if key == "expressions" and isinstance(node, exp.Select):
+            # e.g. SELECT a, b, c — handle the SELECT expression list
+            self._handle_select_exprs(child, clause, depth)
+            return True
+        if isinstance(node, exp.Insert) and key == "this":
+            # e.g. INSERT INTO t (col1, col2) — extract schema columns
+            self._handle_insert_schema(node)
+            return True
+        if key == "using" and isinstance(node, exp.Join):
+            # e.g. JOIN t2 USING (id) — extract shared join columns
+            self._handle_join_using(child)
+            return True
+        return False
+
+    def _recurse_child(self, child: Any, clause: str, depth: int) -> None:
+        """Recursively walk a child value, handling both single nodes and lists.
+
+        This is the default recursion path for ``arg_types`` children
+        that are not intercepted by :meth:`_process_child_key`.
+
+        :param child: A single :class:`~sqlglot.expressions.Expression`
+            or a list of expressions.
+        :param clause: Current clause context to propagate.
+        :param depth: Current nesting depth (incremented for children).
+        """
+        if isinstance(child, list):
+            # e.g. GROUP BY a, b — child is a list of Column expressions
+            for item in child:
+                if isinstance(item, exp.Expression):
+                    self._walk(item, clause, depth + 1)
+        elif isinstance(child, exp.Expression):
+            # e.g. WHERE a > 1 — child is a single expression tree
+            self._walk(child, clause, depth + 1)
+
+    # -------------------------------------------------------------------
+    # Node handlers
+    # -------------------------------------------------------------------
+
+    def _handle_select_exprs(
+        self, exprs: list[exp.Expression], clause: str, depth: int
+    ) -> None:
+        """Process the expression list of a SELECT clause.
+
+        Iterates each expression in the SELECT list, dispatching to
+        the appropriate handler based on node type.  Also builds the
+        ``output_columns`` list which records the projected column
+        names in their original SELECT order.
+
+        Example SQL::
+
+            SELECT a, b AS alias, *, COALESCE(c, d) FROM t
+
+        :param exprs: List of expression nodes from ``SELECT.expressions``.
+        :param clause: Current clause context (typically ``"select"``).
+        :param depth: Current nesting depth.
+        """
+        assert isinstance(exprs, list)
+        out = self._collector.output_columns
+
+        for expr in exprs:
+            if isinstance(expr, exp.Alias):
+                # e.g. SELECT price * qty AS total
+                self._handle_alias(expr, clause, depth)
+                out.append(expr.alias)
+            elif isinstance(expr, exp.Star):
+                # e.g. SELECT *
+                self._collector.add_column("*", clause)
+                out.append("*")
+            elif isinstance(expr, exp.Column):
+                # e.g. SELECT t.col_name
+                self._handle_column(expr, clause)
+                out.append(self._column_full_name(expr))
+            else:
+                # e.g. SELECT COALESCE(a, b) — function/expression without alias
+                cols = self._flat_columns(expr)
+                for col in cols:
+                    self._collector.add_column(col, clause)
+                out.append(cols[0] if len(cols) == 1 else str(expr))
+
+    def _handle_alias(self, alias_node: exp.Alias, clause: str, depth: int) -> None:
+        """Process an ``Alias`` node from a SELECT expression list.
+
+        Handles three cases:
+
+        1. **Subquery alias** — the alias wraps a subquery (contains a
+           ``SELECT``).  The subquery body is walked recursively, and
+           the alias target is derived from the subquery's own SELECT
+           columns.
+        2. **Expression alias with columns** — the inner expression
+           contains one or more column references (e.g. ``a + b AS
+           total``).  Columns are recorded and the alias is mapped to
+           its source column(s).
+        3. **Expression alias without columns** — a literal or star
+           expression (e.g. ``COUNT(*) AS cnt``).  The alias is
+           recorded with a ``"*"`` or ``None`` target.
+
+        Example SQL::
+
+            SELECT (SELECT id FROM t) AS sub, a + b AS total, 1 AS one
+
+        :param alias_node: The ``Alias`` AST node.
+        :param clause: Current clause context.
+        :param depth: Current nesting depth.
+        """
+        c = self._collector
+        alias_name = alias_node.alias
+        inner = alias_node.this
+
+        select = inner.find(exp.Select)
+        if select:
+            # Case 1: alias wraps a subquery — e.g. SELECT (SELECT id FROM t) AS sub
+            self._walk(inner, clause, depth + 1)
+            target_cols = self._flat_columns_select_only(select)
+            target = (
+                target_cols[0]
+                if len(target_cols) == 1
+                else (target_cols if target_cols else None)
+            )
+            c.add_alias(alias_name, target, clause)
+            return
+
+        inner_cols = self._flat_columns(inner)
+
+        if inner_cols:
+            # Case 2: inner expression has column references
+            # e.g. SELECT a + b AS total — record columns a, b
+            for col in inner_cols:
+                c.add_column(col, clause)
+
+            unique_inner = UniqueList(inner_cols)
+            is_self_alias = self._is_self_alias(alias_name, unique_inner)
+            is_direct = isinstance(inner, exp.Column)
+
+            if is_direct and is_self_alias:
+                pass  # e.g. SELECT col AS col — trivial self-alias, skip
+            else:
+                target = None
+                if not is_self_alias:
+                    # e.g. SELECT a + b AS total → target = ["a", "b"]
+                    target = unique_inner[0] if len(unique_inner) == 1 else unique_inner
+                c.add_alias(alias_name, target, clause)
+        else:
+            # Case 3: no column references — e.g. SELECT COUNT(*) AS cnt
+            target = None
+            if inner.find(exp.Star):
+                # e.g. SELECT * AS all_cols — star target
+                target = "*"
+            c.add_alias(alias_name, target, clause)
+
+    def _handle_cte(self, cte: exp.CTE, depth: int) -> None:
+        """Process a CTE (Common Table Expression) AST node.
+
+        Records the CTE alias as a CTE name.  If the CTE declares
+        explicit column aliases (e.g. ``cte(x, y) AS (...)``), maps
+        each alias to its corresponding column from the CTE body.
+        Otherwise, walks the CTE body recursively to extract its
+        columns normally.
+
+        Example SQL::
+
+            WITH cte(x, y) AS (SELECT a, b FROM t) SELECT x FROM cte
+
+        :param cte: The ``CTE`` AST node.
+        :param depth: Current nesting depth.
+        :raises InvalidQueryDefinition: If the CTE has no alias (invalid SQL).
+        """
+        c = self._collector
+        alias = cte.alias
+        if not alias:
+            raise InvalidQueryDefinition(
+                "All CTEs require an alias, not a valid SQL"
+            )
+
+        c.cte_names.append(alias)
+
+        body = cte.this
+
+        if self._has_cte_explicit_column_definitions(cte):
+            # e.g. WITH stats(total, avg) AS (SELECT SUM(x), AVG(x) FROM t)
+            table_alias = cte.args.get("alias")
+            assert table_alias is not None
+            body_cols = self._flat_columns(body)
+            real_cols = [x for x in body_cols if x != "*"]
+            cte_col_names = [col.name for col in table_alias.columns]
+
+            for col in body_cols:
+                c.add_column(col, "select")
+
+            for i, cte_col in enumerate(cte_col_names):
+                if i < len(real_cols):
+                    # Map CTE alias to body column by position
+                    target = real_cols[i]
+                elif "*" in body_cols:
+                    # Body uses SELECT * — map alias to "*"
+                    target = "*"
+                else:
+                    # More aliases than body columns — no target
+                    target = None
+                c.add_alias(cte_col, target, "select")
+                c.cte_alias_names.add(cte_col)
+        elif self._is_cte_with_query_body(body):
+            # CTE without column aliases — e.g. WITH cte AS (SELECT a ...)
+            self._walk(body, "", depth + 1)
+
+    def _handle_insert_schema(self, node: exp.Insert) -> None:
+        """Extract target column names from the Schema of an INSERT statement.
+
+        Looks for the ``Schema`` node inside the INSERT AST and records
+        each column identifier as an ``"insert"``-clause column.
+
+        Example SQL::
+
+            INSERT INTO users (name, email) VALUES ('a', 'b')
+
+        :param node: The ``Insert`` AST node.
+        """
+        schema = node.find(exp.Schema)
+        if schema and schema.expressions:
+            for col_id in schema.expressions:
+                name = col_id.name if hasattr(col_id, "name") else str(col_id)
+                self._collector.add_column(name, "insert")
+
+    def _handle_join_using(self, child: Any) -> None:
+        """Extract column identifiers from a ``JOIN ... USING`` clause.
+
+        Iterates the identifier list and records each as a
+        ``"join"``-clause column.
+
+        Example SQL::
+
+            SELECT * FROM orders JOIN customers USING (customer_id)
+
+        :param child: The USING clause child — a list of identifier
+            nodes.
+        """
+        if isinstance(child, list):
+            # e.g. USING (id, name) — child is a list of Identifier nodes
+            for item in child:
+                if hasattr(item, "name"):
+                    self._collector.add_column(item.name, "join")
+
+    def _handle_column(self, col: exp.Column, clause: str) -> None:
+        """Process a ``Column`` AST node during the walk.
+
+        Handles several column forms:
+
+        - **Table-qualified star** — ``t.*`` is recorded as
+          ``"resolved_table.*"``.
+        - **CTE column-alias reference** — ``cte.col`` where ``col``
+          is a known CTE alias is filed into ``alias_dict`` instead of
+          ``columns``.
+        - **Bare alias reference** — a bare name matching a known alias
+          (e.g. in ``ORDER BY alias``) is filed into ``alias_dict``.
+        - **Regular column** — everything else is recorded via the
+          fully-qualified name.
+
+        Example SQL::
+
+            SELECT t.id, t.*, alias_col FROM t ORDER BY alias_col
+
+        :param col: The ``Column`` AST node.
+        :param clause: Current clause context.
+        """
+        c = self._collector
+
+        star = col.find(exp.Star)
+        if star:
+            # e.g. SELECT t.* — table-qualified star
+            table = col.table
+            if table:
+                table = self._resolve_table_alias(table)
+                c.add_column(f"{table}.*", clause)
+            return
+
+        if self._is_cte_column_alias_reference(col):
+            # e.g. SELECT cte.x — CTE column alias reference
+            c.alias_dict.setdefault(clause, UniqueList()).append(col.name)
+            return
+
+        full = self._column_full_name(col)
+
+        unqualified = col.name
+        if self._is_unqualified_alias_reference(col):
+            # e.g. ORDER BY alias_name — name matches a known alias
+            c.alias_dict.setdefault(clause, UniqueList()).append(unqualified)
+            return
+
+        # e.g. SELECT t.col — regular column, no alias match
+        c.add_column(full, clause)
+
+    # -------------------------------------------------------------------
+    # Column name resolution
+    # -------------------------------------------------------------------
+
+    def _resolve_table_alias(self, col_table: str) -> str:
+        """Replace a table alias with the real table name if mapped.
+
+        Looks up *col_table* in the pre-computed ``table_aliases`` dict.
+        If found, returns the resolved real table name; otherwise
+        returns the input unchanged.
+
+        Example::
+
+            # Given table_aliases = {"t": "users"}
+            _resolve_table_alias("t")  # → "users"
+
+        :param col_table: A table name or alias string.
+        :returns: The resolved table name, or *col_table* if no mapping
+            exists.
+        """
+        return self._table_aliases.get(col_table, col_table)
+
+    def _column_full_name(self, col: exp.Column) -> str:
+        """Build a dot-separated fully-qualified column name.
+
+        Resolves the table alias portion (if present) and assembles
+        the name from up to four parts: ``catalog.db.table.column``.
+        Trailing ``#`` characters are stripped from the column name
+        (used by some dialects for temp-table markers).
+
+        Example SQL::
+
+            SELECT catalog.schema.t.col FROM t
+
+        :param col: A ``Column`` AST node.
+        :returns: The fully-qualified column name string
+            (e.g. ``"users.name"``).
+        """
+        name = col.name.rstrip("#")
+        table = col.table
+        db = col.args.get("db")
+        catalog = col.args.get("catalog")
+
+        if table:
+            # e.g. SELECT t.col — table-qualified column
+            resolved = self._resolve_table_alias(table)
+            parts = []
+            if catalog:
+                # e.g. SELECT catalog.schema.t.col — has catalog prefix
+                parts.append(
+                    catalog.name if isinstance(catalog, exp.Expression) else catalog
+                )
+            if db:
+                # e.g. SELECT schema.t.col — has db/schema prefix
+                parts.append(db.name if isinstance(db, exp.Expression) else db)
+            parts.append(resolved)
+            parts.append(name)
+            return ".".join(parts)
+        # e.g. SELECT col — bare column name without table qualifier
+        return name
+
+    @staticmethod
+    def _is_star_inside_function(star: exp.Star) -> bool:
+        """Check whether a ``*`` node sits inside a function call.
+
+        Uses sqlglot's ``find_ancestor`` to walk the parent chain and
+        look for ``Func`` (built-in functions) or ``Anonymous``
+        (user-defined function) nodes.  A star inside a function like
+        ``COUNT(*)`` should not be recorded as a standalone column.
+
+        Example SQL::
+
+            SELECT COUNT(*) FROM t
+
+        :param star: A ``Star`` AST node.
+        :returns: ``True`` if the star is inside a function call.
+        """
+        return star.find_ancestor(exp.Func, exp.Anonymous) is not None
+
+    # -------------------------------------------------------------------
+    # Predicate helpers
+    # -------------------------------------------------------------------
+
+    @staticmethod
+    def _is_literal_values_without_subquery(
+        node: exp.Expression,
+    ) -> bool:
+        """Check whether *node* is a VALUES clause with only literal values.
+
+        Returns ``True`` for plain ``VALUES (1, 2), (3, 4)`` rows and
+        ``False`` when the VALUES clause contains a subquery
+        (``VALUES (SELECT ...)``).  Literal value lists are skipped
+        during the walk because they contain no column references.
+
+        Example SQL::
+
+            INSERT INTO t VALUES (1, 2)          -- True
+            INSERT INTO t VALUES (SELECT x ...)  -- False
+
+        :param node: An AST node to test.
+        :returns: ``True`` if the node is a literal-only VALUES clause.
+        """
+        return isinstance(node, exp.Values) and not node.find(
+            exp.Select
+        )
+
+    def _is_cte_column_alias_reference(
+        self, col: exp.Column
+    ) -> bool:
+        """Check whether *col* references a known CTE column alias.
+
+        Returns ``True`` when the column is table-qualified with a CTE
+        name and the column name matches one of the CTE's declared
+        column aliases (recorded during CTE processing).
+
+        Example SQL::
+
+            WITH cte AS (...) SELECT cte.x  -- True when x is a CTE alias
+
+        :param col: A ``Column`` AST node.
+        :returns: ``True`` if this is a CTE column-alias reference.
+        """
+        c = self._collector
+        return bool(
+            col.table
+            and col.table in c.cte_names
+            and col.name in c.cte_alias_names
+        )
+
+    def _is_unqualified_alias_reference(
+        self, col: exp.Column
+    ) -> bool:
+        """Check whether *col* is an unqualified reference to a known alias.
+
+        Returns ``True`` when the column has no table qualifier and its
+        name matches a previously recorded column alias.  This typically
+        occurs in ``ORDER BY``, ``GROUP BY``, or ``HAVING`` clauses
+        that reference a SELECT alias by name.
+
+        Example SQL::
+
+            SELECT a AS x ... ORDER BY x  -- True (x has no table qualifier)
+
+        :param col: A ``Column`` AST node.
+        :returns: ``True`` if this is an unqualified alias reference.
+        """
+        c = self._collector
+        return not col.table and col.name in c.alias_names
+
+    @staticmethod
+    def _is_self_alias(
+        alias_name: str, unique_inner: UniqueList
+    ) -> bool:
+        """Check whether an alias maps back to itself.
+
+        Returns ``True`` when the alias name is identical to the single
+        source column (either exactly or by last segment for
+        table-qualified columns).  Self-aliases like
+        ``SELECT col AS col`` are not recorded as meaningful aliases.
+
+        Example SQL::
+
+            SELECT col AS col      -- True (exact match)
+            SELECT t.col AS col    -- True (last_segment match)
+            SELECT a + b AS total  -- False
+
+        :param alias_name: The alias string.
+        :param unique_inner: Deduplicated list of source column names.
+        :returns: ``True`` if the alias is a trivial self-reference.
+        """
+        return len(unique_inner) == 1 and (
+            unique_inner[0] == alias_name
+            or last_segment(unique_inner[0]) == alias_name
+        )
+
+    @staticmethod
+    def _is_standalone_star(
+        child: exp.Star, seen_stars: set[int]
+    ) -> bool:
+        """Check whether a star node is standalone (not consumed by a Column).
+
+        Returns ``True`` when the star has not already been accounted
+        for by a parent ``Column`` node (e.g. ``t.*``) and is not
+        directly nested inside a ``Column``.  Stars inside functions
+        like ``COUNT(*)`` are filtered separately by
+        :meth:`_is_star_inside_function`.
+
+        Example SQL::
+
+            SELECT * FROM t    -- True
+            SELECT t.* FROM t  -- False (consumed by Column parent)
+
+        :param child: A ``Star`` AST node.
+        :param seen_stars: Set of ``id()`` values for stars already
+            consumed by a parent ``Column`` node.
+        :returns: ``True`` if this is a standalone star.
+        """
+        return id(child) not in seen_stars and not isinstance(
+            child.parent, exp.Column
+        )
+
+    @staticmethod
+    def _has_cte_explicit_column_definitions(
+        cte: exp.CTE,
+    ) -> bool:
+        """Check whether a CTE declares explicit column aliases.
+
+        Returns ``True`` when the CTE has a column definition list in
+        its signature (e.g. ``cte(x, y)``) and the CTE body is a
+        ``SELECT`` statement.
+
+        Example SQL::
+
+            WITH stats(total, avg) AS (SELECT SUM(x), AVG(x) FROM t)  -- True
+            WITH cte AS (SELECT a FROM t)                              -- False
+
+        :param cte: A ``CTE`` AST node.
+        :returns: ``True`` if the CTE has explicit column definitions.
+        """
+        table_alias = cte.args.get("alias")
+        return bool(
+            table_alias
+            and table_alias.columns
+            and cte.this
+            and isinstance(cte.this, exp.Select)
+        )
+
+    @staticmethod
+    def _is_cte_with_query_body(
+        body: exp.Expression,
+    ) -> bool:
+        """Check whether a CTE body is a walkable query statement.
+
+        Returns ``True`` for standard SQL query bodies (SELECT, UNION,
+        INTERSECT, EXCEPT) and ``False`` for scalar expression bodies
+        used by some dialects (e.g. ClickHouse's
+        ``WITH '2019-08-01' AS ts``  where the body is a Literal,
+        or ``WITH 1 + 2 AS val`` where the body is an Add).
+
+        :param body: The ``this`` child of a CTE node.
+        :returns: ``True`` if the body is a query that should be walked.
+        """
+        return isinstance(
+            body, (exp.Select, exp.Union, exp.Intersect, exp.Except)
+        )
+
+    # -------------------------------------------------------------------
+    # Flat column extraction
+    # -------------------------------------------------------------------
+
+    def _flat_columns_select_only(self, select: exp.Select) -> list[str]:
+        """Extract column/alias names from a SELECT's immediate expressions.
+
+        Unlike :meth:`_flat_columns`, this does not recurse into the
+        full AST subtree — it only inspects the top-level expressions
+        of a SELECT clause.  Used by :meth:`_handle_alias` to determine
+        the alias target for subquery aliases.
+
+        Example SQL::
+
+            SELECT a, b AS alias, * FROM t
+
+        :param select: A ``Select`` AST node.
+        :returns: A list of column name / alias name strings in SELECT
+            order.
+        """
+        cols = []
+        for expr in select.expressions or []:
+            if isinstance(expr, exp.Alias):
+                # e.g. SELECT b AS alias — use the alias name
+                cols.append(expr.alias)
+            elif isinstance(expr, exp.Column):
+                # e.g. SELECT a — use the fully-qualified column name
+                cols.append(self._column_full_name(expr))
+            elif isinstance(expr, exp.Star):
+                # e.g. SELECT * — literal star
+                cols.append("*")
+            else:
+                # e.g. SELECT COALESCE(a, b) — extract columns from expression
+                for col_name in self._flat_columns(expr):
+                    cols.append(col_name)
+        return cols
+
+    def _flat_columns(self, node: exp.Expression) -> list[str]:
+        """Extract all column names from an expression subtree via DFS.
+
+        Performs a full depth-first traversal of *node* using
+        :func:`_dfs` and collects every ``Column`` and standalone
+        ``Star`` reference found.  Tracks already-seen star nodes to
+        avoid double-counting table-qualified stars (e.g. ``t.*``
+        produces both a ``Column`` and a nested ``Star``).
+
+        Example SQL::
+
+            COALESCE(t.a, b, c)
+
+        :param node: Root expression node to scan.
+        :returns: A list of column name strings in DFS encounter order.
+        """
+        assert node is not None
+        cols = []
+        seen_stars: set[int] = set()
+        for child in _dfs(node):
+            name = self._collect_column_from_node(child, seen_stars)
+            if name is not None:
+                cols.append(name)
+        return cols
+
+    def _collect_column_from_node(
+        self, child: exp.Expression, seen_stars: set[int]
+    ) -> str | None:
+        """Extract a column name from a single DFS-visited node.
+
+        Called by :meth:`_flat_columns` for each node in the traversal.
+        Handles ``Column`` nodes (resolving table aliases and skipping
+        date-part unit keywords) and standalone ``Star`` nodes (skipping
+        stars inside functions like ``COUNT(*)``).
+
+        Example SQL::
+
+            DATEDIFF(day, start_date, end_date)
+
+        In this example, ``day`` is a date-part unit keyword and should
+        be skipped, while ``start_date`` and ``end_date`` are real
+        columns.
+
+        :param child: A single AST node from the DFS traversal.
+        :param seen_stars: Set of ``id()`` values for ``Star`` nodes
+            already consumed by a parent ``Column`` (e.g. ``t.*``).
+        :returns: The column name string, or ``None`` if the node is
+            not a column reference.
+        """
+        if isinstance(child, exp.Column):
+            # e.g. SELECT t.col, DATEDIFF(day, a, b)
+            if _is_date_part_unit(child):
+                # e.g. DATEDIFF(day, ...) — "day" is a unit keyword, not a column
+                return None
+            star = child.find(exp.Star)
+            if star:
+                # e.g. SELECT t.* — table-qualified star within a Column node
+                seen_stars.add(id(star))
+                table = child.table
+                if table:
+                    table = self._resolve_table_alias(table)
+                    return f"{table}.*"
+            return self._column_full_name(child)  # e.g. SELECT t.col
+        if isinstance(child, exp.Star):
+            # e.g. SELECT * — standalone star (not inside a Column node)
+            if self._is_standalone_star(child, seen_stars):
+                if not self._is_star_inside_function(child):
+                    # e.g. SELECT * FROM t — standalone star, not COUNT(*)
+                    return "*"
+        return None
diff --git a/sql_metadata/comments.py b/sql_metadata/comments.py
new file mode 100644
index 00000000..835e7736
--- /dev/null
+++ b/sql_metadata/comments.py
@@ -0,0 +1,188 @@
+"""Extract and strip SQL comments using the sqlglot tokenizer.
+
+sqlglot's tokenizer skips comments during tokenization, which means
+comments live in the *gaps* between consecutive token positions.  This
+module exploits that property: it tokenizes the SQL, then scans each gap
+for comment delimiters (``--``, ``/* */``, ``#``).
+
+Two public entry points exist:
+
+* :func:`extract_comments` — returns the raw comment texts (delimiters
+  included) for inspection or logging.
+* :func:`strip_comments` — returns the SQL with all comments removed and
+  whitespace normalised, used by :class:`Parser` for the ``without_comments``
+  property.
+
+A third, internal variant :func:`strip_comments_for_parsing` is consumed
+by :mod:`_ast` before handing SQL to ``sqlglot.parse()``; it always uses
+the MySQL tokenizer so that ``#``-style comments are reliably stripped.
+"""
+
+import re
+from typing import Any
+
+from sqlglot.tokens import Tokenizer
+
+
+def _choose_tokenizer(sql: str) -> Tokenizer:
+    """Select the appropriate sqlglot tokenizer for *sql*.
+
+    The default sqlglot tokenizer does **not** treat ``#`` as a comment
+    delimiter, but MySQL does.  When ``#`` appears in the SQL and is used
+    as a comment (not as a variable/template prefix), we switch to the
+    MySQL tokenizer so that ``#``-style comments are properly skipped.
+
+    :param sql: Raw SQL string to inspect.
+    :type sql: str
+    :returns: An instantiated tokenizer (MySQL or default).
+    :rtype: sqlglot.tokens.Tokenizer
+    """
+    if "#" in sql and not _has_hash_variables(sql):
+        from sqlglot.dialects.mysql import MySQL
+
+        return MySQL.Tokenizer()
+    return Tokenizer()
+
+
+def _has_hash_variables(sql: str) -> bool:
+    """Determine whether ``#`` characters in *sql* are variable references.
+
+    MSSQL uses ``#table`` for temporary tables and some template engines
+    use ``#VAR#`` placeholders.  This function distinguishes those from
+    MySQL-style ``# comment`` lines so that :func:`_choose_tokenizer`
+    picks the right dialect.
+
+    Heuristics (checked via regex):
+
+    * ``#WORD#`` — bracketed template variable.
+    * ``= #WORD`` or ``(#WORD`` — assignment / parameter context.
+
+    :param sql: Raw SQL string.
+    :type sql: str
+    :returns: ``True`` if at least one ``#`` looks like a variable prefix.
+    :rtype: bool
+    """
+    # #WORD# template variable (e.g. #VAR#)
+    if re.search(r"#\w+#", sql):
+        return True
+    # = #WORD or (#WORD with optional whitespace before #
+    if re.search(r"[=(]\s*#\w", sql):
+        return True
+    return False
+
+
+def extract_comments(sql: str) -> list[str]:
+    """Return all comments found in *sql*, with delimiters preserved.
+
+    Tokenizes the SQL, then scans every gap between consecutive token
+    positions for comment delimiters.  Returned strings include the
+    opening delimiter (``--``, ``/*``, ``#``) and, for block comments,
+    the closing ``*/``.
+
+    Called by :attr:`Parser.comments`.
+
+    :param sql: Raw SQL string.
+    :type sql: str
+    :returns: List of comment strings in source order.
+    :rtype: List[str]
+    """
+    if not sql:
+        return []
+    try:
+        tokens = list(_choose_tokenizer(sql).tokenize(sql))
+    # TODO: revisit if sqlglot tokenizer starts raising on specific inputs
+    except Exception:  # pragma: no cover
+        return []
+    comments: list[str] = []
+    prev_end = -1
+    for tok in tokens:
+        _scan_gap(sql, prev_end + 1, tok.start, comments)
+        prev_end = tok.end
+    _scan_gap(sql, prev_end + 1, len(sql), comments)
+    return comments
+
+
+#: Matches all three SQL comment styles in a single pass:
+#: ``/* ... */`` (block, possibly unterminated), ``-- ...``, and ``# ...``.
+_COMMENT_RE = re.compile(r"/\*.*?\*/|/\*.*$|--[^\n]*\n?|#[^\n]*\n?", re.DOTALL)
+
+
+def _scan_gap(sql: str, start: int, end: int, out: list[str]) -> None:
+    """Scan a slice of *sql* for comment delimiters and append matches.
+
+    :param sql: The full SQL string (not just the gap).
+    :param start: Start index of the gap to scan.
+    :param end: End index (exclusive) of the gap.
+    :param out: Mutable list to which discovered comment strings are appended.
+    """
+    out.extend(_COMMENT_RE.findall(sql[start:end]))
+
+
+def _reconstruct_from_tokens(sql: str, tokens: list[Any]) -> str:
+    """Rebuild SQL from token spans, collapsing gaps to single spaces."""
+    if not tokens:
+        return ""
+    parts = [sql[tokens[0].start : tokens[0].end + 1]]
+    for i in range(1, len(tokens)):
+        if tokens[i].start > tokens[i - 1].end + 1:
+            parts.append(" ")
+        parts.append(sql[tokens[i].start : tokens[i].end + 1])
+    return "".join(parts).strip()
+
+
+def strip_comments_for_parsing(sql: str) -> str:
+    """Strip **all** comments — including ``#`` lines — for sqlglot parsing.
+
+    Unlike :func:`strip_comments`, this always uses the MySQL tokenizer
+    (which treats ``#`` as a comment delimiter) so that hash-style
+    comments are removed before ``sqlglot.parse()`` sees the SQL.  The
+    only exceptions are ``CREATE FUNCTION`` bodies (which may contain
+    ``#`` in procedural code) and MSSQL ``#temp`` table references.
+
+    Called exclusively by :meth:`ASTParser._parse` in ``_ast.py``.
+
+    :param sql: Raw SQL string.
+    :type sql: str
+    :returns: SQL with all comments removed and whitespace collapsed.
+    :rtype: str
+    """
+    if not sql:
+        return sql or ""
+    # Skip MySQL tokenizer when # is used as variable (not comment)
+    upper = sql.strip().upper()
+    if upper.startswith("CREATE FUNCTION") or _has_hash_variables(sql):
+        tokenizer = Tokenizer()
+    else:
+        from sqlglot.dialects.mysql import MySQL
+
+        tokenizer = MySQL.Tokenizer()
+    try:
+        tokens = list(tokenizer.tokenize(sql))
+    except Exception:
+        return sql.strip()
+    return _reconstruct_from_tokens(sql, tokens)
+
+
+def strip_comments(sql: str) -> str:
+    """Remove comments and normalise whitespace, preserving ``#VAR`` references.
+
+    Reconstructs the SQL from its token spans, inserting a single space
+    wherever a gap (comment or extra whitespace) existed between two
+    tokens.  Uses :func:`_choose_tokenizer` so that ``#VAR`` template
+    variables in MSSQL queries are kept intact.
+
+    Called by :attr:`Parser.without_comments` and
+    :attr:`Generalizator.without_comments`.
+
+    :param sql: Raw SQL string.
+    :type sql: str
+    :returns: SQL with comments removed and whitespace normalised.
+    :rtype: str
+    """
+    if not sql:
+        return sql or ""
+    try:
+        tokens = list(_choose_tokenizer(sql).tokenize(sql))
+    except Exception:
+        return sql.strip()
+    return _reconstruct_from_tokens(sql, tokens)
diff --git a/sql_metadata/compat.py b/sql_metadata/compat.py
deleted file mode 100644
index 88eea38e..00000000
--- a/sql_metadata/compat.py
+++ /dev/null
@@ -1,58 +0,0 @@
-"""
-This module provides a temporary compatibility layer
-for legacy API dating back to 1.x version.
-
-Change your old imports:
-
-from sql_metadata import get_query_columns, get_query_tables
-
-into:
-
-from sql_metadata.compat import get_query_columns, get_query_tables
-
-"""
-
-# pylint:disable=missing-function-docstring
-from typing import List, Optional, Tuple
-
-import sqlparse
-from sqlparse.sql import TokenList
-from sqlparse.tokens import Whitespace
-
-from sql_metadata import Parser
-
-
-def preprocess_query(query: str) -> str:
-    return Parser(query).query
-
-
-def get_query_tokens(query: str) -> List[sqlparse.sql.Token]:
-    query = preprocess_query(query)
-    parsed = sqlparse.parse(query)
-
-    # handle empty queries (#12)
-    if not parsed:
-        return []
-
-    tokens = TokenList(parsed[0].tokens).flatten()
-
-    return [token for token in tokens if token.ttype is not Whitespace]
-
-
-def get_query_columns(query: str) -> List[str]:
-    return Parser(query).columns
-
-
-def get_query_tables(query: str) -> List[str]:
-    return Parser(query).tables
-
-
-def get_query_limit_and_offset(query: str) -> Optional[Tuple[int, int]]:
-    return Parser(query).limit_and_offset
-
-
-def generalize_sql(query: Optional[str] = None) -> Optional[str]:
-    if query is None:
-        return None
-
-    return Parser(query).generalize
diff --git a/sql_metadata/dialect_parser.py b/sql_metadata/dialect_parser.py
new file mode 100644
index 00000000..848fc365
--- /dev/null
+++ b/sql_metadata/dialect_parser.py
@@ -0,0 +1,235 @@
+"""SQL dialect detection, parsing, and parse-quality validation.
+
+Combines dialect heuristics (which sqlglot dialect to try), the actual
+``sqlglot.parse()`` call, and degraded-result detection into a single
+class so that callers only need to call :meth:`DialectParser.parse`.
+"""
+
+import logging
+from typing import Any
+
+import sqlglot
+from sqlglot import exp
+from sqlglot.dialects.dialect import Dialect, DialectType
+from sqlglot.dialects.redshift import Redshift
+from sqlglot.dialects.tsql import TSQL
+from sqlglot.errors import ParseError, TokenError
+from sqlglot.parsers.redshift import RedshiftParser
+from sqlglot.tokens import Tokenizer as BaseTokenizer
+
+from sql_metadata.comments import _has_hash_variables
+from sql_metadata.exceptions import InvalidQueryDefinition
+
+#: Table names that indicate a degraded parse result.
+_BAD_TABLE_NAMES = frozenset({"IGNORE", ""})
+
+#: SQL keywords that should not appear as bare column names.
+_BAD_COLUMN_NAMES = frozenset({"UNIQUE", "DISTINCT", "SELECT", "FROM", "WHERE"})
+
+
+# ---------------------------------------------------------------------------
+# Custom dialect classes
+# ---------------------------------------------------------------------------
+
+
+class HashVarDialect(Dialect):
+    """Custom sqlglot dialect that treats ``#WORD`` as identifiers.
+
+    MSSQL uses ``#`` to prefix temporary table names (e.g. ``#temp``)
+    and some template engines use ``#VAR#`` placeholders.  The default
+    sqlglot tokenizer treats ``#`` as an unknown single-character token;
+    this dialect moves it into ``VAR_SINGLE_TOKENS`` so it becomes part
+    of a ``VAR`` token instead.
+    """
+
+    class Tokenizer(BaseTokenizer):
+        """Tokenizer subclass that includes ``#`` in variable tokens."""
+
+        SINGLE_TOKENS = {**BaseTokenizer.SINGLE_TOKENS}
+        SINGLE_TOKENS.pop("#", None)
+        VAR_SINGLE_TOKENS = {*BaseTokenizer.VAR_SINGLE_TOKENS, "#"}
+
+
+class _RedshiftAppendParser(RedshiftParser):
+    """Redshift parser extended with ``ALTER TABLE ... APPEND FROM``."""
+
+    def _parse_alter_table_append(self) -> "exp.Expr | None":
+        self._match_text_seq("FROM")
+        return self._parse_table()
+
+    ALTER_PARSERS = {
+        **RedshiftParser.ALTER_PARSERS,
+        "APPEND": lambda self: self._parse_alter_table_append(),
+    }
+
+
+class RedshiftAppendDialect(Redshift):
+    """Redshift dialect extended with ``ALTER TABLE ... APPEND FROM`` support.
+
+    Redshift's ``APPEND FROM`` syntax is not natively supported by sqlglot,
+    which causes the statement to degrade to ``exp.Command``.  This dialect
+    adds an ``APPEND`` entry to ``ALTER_PARSERS`` so the statement is parsed
+    as a proper ``exp.Alter`` with ``exp.Table`` nodes.
+    """
+
+    Parser = _RedshiftAppendParser
+
+
+class BracketedTableDialect(TSQL):
+    """TSQL dialect for queries containing ``[bracketed]`` identifiers.
+
+    sqlglot's TSQL dialect correctly interprets square-bracket quoting,
+    which the default dialect does not.  This thin subclass exists so
+    that ``TableExtractor`` can ``isinstance``-check to enable
+    bracket-preserving table name construction.
+    """
+
+
+# ---------------------------------------------------------------------------
+# DialectParser
+# ---------------------------------------------------------------------------
+
+
+class DialectParser:
+    """Detect the appropriate sqlglot dialect and parse SQL into an AST."""
+
+    def parse(self, clean_sql: str) -> tuple[exp.Expression, DialectType]:
+        """Parse *clean_sql*, returning ``(ast, dialect)``.
+
+        Detects candidate dialects via heuristics, tries each in order,
+        and returns the first non-degraded result.
+
+        :param clean_sql: Preprocessed SQL string (comments stripped, etc.).
+        :type clean_sql: str
+        :returns: 2-tuple of ``(ast_node, winning_dialect)``.
+        :rtype: tuple
+        :raises ValueError: If all dialect attempts fail.
+        """
+        dialects = self._detect_dialects(clean_sql)
+        return self._try_dialects(clean_sql, dialects)
+
+    # -- dialect detection --------------------------------------------------
+
+    @staticmethod
+    def _detect_dialects(sql: str) -> list[Any]:
+        """Choose an ordered list of sqlglot dialects to try for *sql*.
+
+        Heuristics:
+
+        * ``#WORD`` → :class:`HashVarDialect` (MSSQL temp tables).
+        * Back-ticks → ``"mysql"``.
+        * Square brackets or ``TOP`` → :class:`BracketedTableDialect`.
+        * ``UNIQUE`` → try default, MySQL, Oracle.
+        * ``LATERAL VIEW`` → ``"spark"`` (Hive).
+
+        :param sql: Cleaned SQL string.
+        :type sql: str
+        :returns: Ordered list of dialects to attempt.
+        :rtype: list
+        """
+        upper = sql.upper()
+        if _has_hash_variables(sql):
+            return [HashVarDialect, None, "mysql"]
+        if "`" in sql:
+            return ["mysql", None]
+        if "LATERAL VIEW" in upper:
+            return ["spark", None, "mysql"]
+        if "[" in sql or " TOP " in upper:
+            return [BracketedTableDialect, None, "mysql"]
+        if " UNIQUE " in upper:
+            return [None, "mysql", "oracle"]
+        if "APPEND FROM" in upper:
+            return [RedshiftAppendDialect, None, "mysql"]
+        return [None, "mysql"]
+
+    # -- parsing ------------------------------------------------------------
+
+    def _try_dialects(
+        self, clean_sql: str, dialects: list[Any]
+    ) -> tuple[exp.Expression, DialectType]:
+        """Try parsing *clean_sql* with each dialect, returning the best.
+
+        :returns: 2-tuple of ``(ast_node, winning_dialect)``.
+        :raises ValueError: If all dialect attempts fail.
+        """
+        last_result = None
+        winning_dialect = None
+        for dialect in dialects:
+            try:
+                result = self._parse_with_dialect(clean_sql, dialect)
+                if result is None:
+                    continue
+                last_result = result
+                winning_dialect = dialect
+                is_last = dialect == dialects[-1]
+                if not is_last and self._is_degraded(result, clean_sql):
+                    continue
+                return result, dialect
+            except (ParseError, TokenError):
+                if dialect is not None and dialect == dialects[-1]:
+                    raise InvalidQueryDefinition(
+                        "Query could not be parsed — SQL syntax error"
+                    )
+                continue
+
+        # TODO: revisit if sqlglot starts returning None from parse for last dialect
+        if last_result is not None:  # pragma: no cover
+            return last_result, winning_dialect
+        raise InvalidQueryDefinition(
+            "Query could not be parsed — no dialect could handle this SQL"
+        )
+
+    @staticmethod
+    def _parse_with_dialect(clean_sql: str, dialect: Any) -> exp.Expression | None:
+        """Parse *clean_sql* with a single dialect, suppressing warnings."""
+        logger = logging.getLogger("sqlglot")
+        old_level = logger.level
+        logger.setLevel(logging.CRITICAL)
+        try:
+            results = sqlglot.parse(
+                clean_sql,
+                dialect=dialect,
+                error_level=sqlglot.ErrorLevel.WARN,
+            )
+        finally:
+            logger.setLevel(old_level)
+
+        if not results or results[0] is None:
+            return None
+        result = results[0]
+        assert result is not None  # guaranteed by check above
+        # TODO: revisit if sqlglot returns top-level Subquery
+        if isinstance(result, exp.Subquery) and not result.alias:  # pragma: no cover
+            inner = result.this
+            if isinstance(inner, exp.Expression):
+                return inner
+        return result  # type: ignore[return-value]
+
+    # -- quality checks -----------------------------------------------------
+
+    def _is_degraded(self, result: exp.Expression, clean_sql: str) -> bool:
+        """Return ``True`` when a better dialect should be tried."""
+        if isinstance(result, exp.Command) and not self._is_expected_command(clean_sql):
+            return True
+        return self._has_parse_issues(result)
+
+    @staticmethod
+    def _is_expected_command(sql: str) -> bool:
+        """Check whether *sql* legitimately parses as ``exp.Command``."""
+        upper = sql.strip().upper()
+        return upper.startswith("CREATE FUNCTION")
+
+    @staticmethod
+    def _has_parse_issues(ast: exp.Expression) -> bool:
+        """Detect signs of a degraded or incorrect parse.
+
+        Checks for table nodes with empty/keyword-like names and column
+        nodes whose name is a SQL keyword without a table qualifier.
+        """
+        for table in ast.find_all(exp.Table):
+            if table.name in _BAD_TABLE_NAMES:
+                return True
+        for col in ast.find_all(exp.Column):
+            if col.name.upper() in _BAD_COLUMN_NAMES and not col.table:
+                return True
+        return False
diff --git a/sql_metadata/exceptions.py b/sql_metadata/exceptions.py
new file mode 100644
index 00000000..c698b370
--- /dev/null
+++ b/sql_metadata/exceptions.py
@@ -0,0 +1,5 @@
+"""Custom exceptions for the sql-metadata package."""
+
+
+class InvalidQueryDefinition(ValueError):
+    """Raised when the SQL query is structurally invalid or unsupported."""
diff --git a/sql_metadata/generalizator.py b/sql_metadata/generalizator.py
index 97eb35d1..f0639517 100644
--- a/sql_metadata/generalizator.py
+++ b/sql_metadata/generalizator.py
@@ -1,26 +1,52 @@
-"""
-Module used to produce generalized sql out of given query
+"""Produce a generalised (anonymised) version of a SQL query.
+
+Replaces string literals with ``X``, numbers with ``N``, and
+multi-value ``IN (...)`` / ``VALUES (...)`` lists with ``(XYZ)`` so
+that structurally identical queries can be grouped for analysis
+(e.g. slow-query log aggregation).  Based on MediaWiki's
+``DatabaseBase::generalizeSQL``.
 """
 
 import re
-import sqlparse
+
+from sql_metadata.comments import strip_comments
 
 
 class Generalizator:
-    """
-    Class used to produce generalized sql out of given query
+    """Produce a generalised form of a SQL query.
+
+    Strips comments, removes string literals and numeric values, and
+    collapses repeated ``LIKE`` / ``IN`` / ``VALUES`` clauses.  Designed
+    for grouping structurally identical queries in monitoring and logging
+    pipelines.
+
+    Used by :attr:`Parser.generalize`, which delegates to
+    :attr:`Generalizator.generalize`.
+
+    :param sql: Raw SQL query string to generalise.
+    :type sql: str
     """
 
     def __init__(self, sql: str = ""):
+        """Initialise with the raw SQL string.
+
+        :param sql: SQL query to generalise.
+        :type sql: str
+        """
         self._raw_query = sql
 
     # SQL queries normalization (#16)
     @staticmethod
     def _normalize_likes(sql: str) -> str:
-        """
-        Normalize and wrap LIKE statements
+        """Normalise and collapse repeated ``LIKE`` clauses.
+
+        Strips ``%`` wildcards, replaces ``LIKE '...'`` with ``LIKE X``,
+        and collapses consecutive ``or/and ... LIKE X`` clauses into a
+        single instance with ``...`` suffix.
 
-        :type sql str
+        :param sql: SQL string with LIKE clauses.
+        :type sql: str
+        :returns: SQL with LIKE clauses normalised.
         :rtype: str
         """
         sql = sql.replace("%", "")
@@ -29,11 +55,11 @@ def _normalize_likes(sql: str) -> str:
         sql = re.sub(r"LIKE '[^\']+'", "LIKE X", sql)
 
         # or all_groups LIKE X or all_groups LIKE X
-        matches = re.finditer(r"(or|and) [^\s]+ LIKE X", sql, flags=re.IGNORECASE)
-        matches = [match.group(0) for match in matches] if matches else None
+        found = re.finditer(r"(or|and) [^\s]+ LIKE X", sql, flags=re.IGNORECASE)
+        like_matches = [m.group(0) for m in found]
 
-        if matches:
-            for match in set(matches):
+        if like_matches:
+            for match in set(like_matches):
                 sql = re.sub(
                     r"(\s?" + re.escape(match) + ")+", " " + match + " ...", sql
                 )
@@ -42,23 +68,33 @@ def _normalize_likes(sql: str) -> str:
 
     @property
     def without_comments(self) -> str:
-        """
-        Removes comments from SQL query
+        """Return the SQL with all comments removed.
 
+        Delegates to :func:`strip_comments` from ``_comments.py``.
+
+        :returns: Comment-free SQL string.
         :rtype: str
         """
-        sql = sqlparse.format(self._raw_query, strip_comments=True)
-        sql = sql.replace("\n", " ")
-        sql = re.sub(r"[ \t]+", " ", sql)
-        return sql
+        return strip_comments(self._raw_query)
 
     @property
     def generalize(self) -> str:
-        """
-        Removes most variables from an SQL query
-        and replaces them with X or N for numbers.
+        """Return a generalised version of the SQL query.
+
+        Applies the following transformations in order:
 
-        Based on Mediawiki's DatabaseBase::generalizeSQL
+        1. Strip comments.
+        2. Remove double-quotes.
+        3. Collapse multiple spaces.
+        4. Normalise ``LIKE`` clauses.
+        5. Replace escaped characters.
+        6. Replace string literals with ``X``.
+        7. Collapse whitespace to single spaces.
+        8. Replace numbers with ``N``.
+        9. Collapse ``IN (...)`` / ``VALUES (...)`` lists to ``(XYZ)``.
+
+        :returns: Generalised SQL string, or ``""`` for empty input.
+        :rtype: str
         """
         if self._raw_query == "":
             return ""
diff --git a/sql_metadata/keywords_lists.py b/sql_metadata/keywords_lists.py
index f086287a..4e4fbc66 100644
--- a/sql_metadata/keywords_lists.py
+++ b/sql_metadata/keywords_lists.py
@@ -1,11 +1,18 @@
-"""
-Module provide lists of sql keywords that should trigger or skip
-checks for tables an columns
+"""SQL keyword sets and enums used to classify tokens and query types.
+
+Defines the canonical sets of normalised SQL keywords that the token-based
+parser (``token.py``) and the AST-based extractors use to decide when a
+token is relevant (e.g. precedes a column or table reference) and to map
+query prefixes to :class:`QueryType` values.  Keyword values are stored
+**without spaces** (``INNERJOIN``, ``ORDERBY``) because the tokeniser
+strips whitespace before comparison.
 """
 
-# these keywords are followed by columns reference
 from enum import Enum
 
+#: Normalised keywords after which the next token(s) are column references.
+#: Used by the token-linked-list walker and by ``COLUMNS_SECTIONS`` to
+#: decide which ``columns_dict`` section a column belongs to.
 KEYWORDS_BEFORE_COLUMNS = {
     "SELECT",
     "WHERE",
@@ -17,7 +24,9 @@
     "USING",
 }
 
-# normalized list of table preceding keywords
+#: Normalised keywords after which the next token is a **table** name.
+#: Includes all JOIN variants (whitespace-stripped) as well as INTO,
+#: UPDATE, TABLE, and the DDL guard ``IFNOTEXISTS``.
 TABLE_ADJUSTMENT_KEYWORDS = {
     "FROM",
     "JOIN",
@@ -36,10 +45,14 @@
     "IFNOTEXISTS",
 }
 
-# next statement beginning after with statement
+#: Keywords that signal the end of a ``WITH`` (CTE) block and the start
+#: of the main statement body.  Used by the legacy token-based WITH parser
+#: and referenced in ``_ast.py`` for malformed-query detection.
 WITH_ENDING_KEYWORDS = {"UPDATE", "SELECT", "DELETE", "REPLACE", "INSERT"}
 
-# subquery preceding keywords
+#: Keywords that can appear immediately before a parenthesised subquery
+#: in a FROM/JOIN position.  A subset of ``TABLE_ADJUSTMENT_KEYWORDS``
+#: excluding DML-only entries (INTO, UPDATE, TABLE).
 SUBQUERY_PRECEDING_KEYWORDS = {
     "FROM",
     "JOIN",
@@ -54,8 +67,10 @@
     "NATURALJOIN",
 }
 
-# section of a query in which column can exists
-# based on last normalized keyword
+#: Maps a normalised keyword to the ``columns_dict`` section name that
+#: columns following it belong to.  For example, columns after ``SELECT``
+#: go into the ``"select"`` section, columns after ``ON``/``USING`` go
+#: into ``"join"``.
 COLUMNS_SECTIONS = {
     "SELECT": "select",
     "WHERE": "where",
@@ -71,8 +86,11 @@
 
 
 class QueryType(str, Enum):
-    """
-    Types of supported queries
+    """Enumeration of SQL statement types recognised by the parser.
+
+    Inherits from :class:`str` so that values are directly comparable to
+    plain strings (``parser.query_type == "SELECT"``).  Returned by
+    :attr:`Parser.query_type` and by :class:`_query_type.QueryTypeExtractor`.
     """
 
     INSERT = "INSERT"
@@ -84,11 +102,16 @@ class QueryType(str, Enum):
     ALTER = "ALTER TABLE"
     DROP = "DROP TABLE"
     TRUNCATE = "TRUNCATE TABLE"
+    MERGE = "MERGE"
 
 
 class TokenType(str, Enum):
-    """
-    Types of SQLTokens
+    """Semantic classification assigned to an :class:`SQLToken` during parsing.
+
+    These types are used by the legacy token-based extraction pipeline to
+    label each token after the keyword-driven classification pass.  In the
+    v3 sqlglot-based pipeline they are still referenced for backward
+    compatibility in test assertions and token introspection.
     """
 
     COLUMN = "COLUMN"
@@ -100,7 +123,10 @@ class TokenType(str, Enum):
     PARENTHESIS = "PARENTHESIS"
 
 
-# cannot fully replace with enum as with/select has the same key
+#: Maps normalised query-prefix strings to :class:`QueryType` values.
+#: Cannot be replaced by the enum alone because ``WITH`` maps to
+#: ``SELECT`` (a CTE followed by its main query) and composite prefixes
+#: like ``CREATETABLE`` need their own entries.
 SUPPORTED_QUERY_TYPES = {
     "INSERT": QueryType.INSERT,
     "REPLACE": QueryType.REPLACE,
@@ -116,8 +142,10 @@ class TokenType(str, Enum):
     "TRUNCATETABLE": QueryType.TRUNCATE,
 }
 
-# all the keywords we care for - rest is ignored in assigning
-# the last keyword
+#: Union of all keyword sets the tokeniser cares about.  Tokens whose
+#: normalised value falls outside this set are **not** tracked as the
+#: ``last_keyword`` on subsequent tokens, keeping the classification
+#: logic focused on structurally significant positions only.
 RELEVANT_KEYWORDS = {
     *KEYWORDS_BEFORE_COLUMNS,
     *TABLE_ADJUSTMENT_KEYWORDS,
diff --git a/sql_metadata/nested_resolver.py b/sql_metadata/nested_resolver.py
new file mode 100644
index 00000000..b43c57e0
--- /dev/null
+++ b/sql_metadata/nested_resolver.py
@@ -0,0 +1,658 @@
+"""Nested column resolution and CTE/subquery body extraction.
+
+The :class:`NestedResolver` class owns the complete "look inside nested
+queries" concern: rendering CTE/subquery AST nodes back to SQL, parsing
+those bodies with sub-:class:`Parser` instances, and resolving
+``subquery.column`` references to actual columns.
+"""
+
+from __future__ import annotations
+
+import copy
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from sql_metadata.parser import Parser
+
+from sqlglot import exp
+from sqlglot.generator import Generator
+
+from sql_metadata.utils import (
+    UniqueList,
+    last_segment,
+)
+
+# ---------------------------------------------------------------------------
+# Custom SQL generator — preserves function signatures
+# ---------------------------------------------------------------------------
+
+
+class _PreservingGenerator(Generator):
+    """Custom SQL generator that preserves function signatures.
+
+    sqlglot normalises certain functions when rendering SQL (e.g.
+    ``IFNULL`` → ``COALESCE``, ``DIV`` → ``CAST(… / … AS INT)``).
+    This generator overrides those transformations so that the output
+    only differs from the input in keyword/function-name casing and
+    explicit ``AS`` insertion.
+    """
+
+    TRANSFORMS = {
+        **Generator.TRANSFORMS,
+        exp.CurrentDate: lambda self, e: "CURRENT_DATE()",
+        exp.IntDiv: lambda self, e: (
+            f"{self.sql(e, 'this')} DIV {self.sql(e, 'expression')}"
+        ),
+    }
+
+    def coalesce_sql(self, expression: exp.Expression) -> str:
+        args = [expression.this] + expression.expressions
+        if len(args) == 2:
+            return f"IFNULL({self.sql(args[0])}, {self.sql(args[1])})"
+        args_sql = ", ".join(self.sql(a) for a in args)
+        return f"COALESCE({args_sql})"
+
+    def dateadd_sql(self, expression: exp.Expression) -> str:
+        return (
+            f"DATE_ADD({self.sql(expression, 'this')}, "
+            f"{self.sql(expression, 'expression')})"
+        )
+
+    def datesub_sql(self, expression: exp.Expression) -> str:
+        return (
+            f"DATE_SUB({self.sql(expression, 'this')}, "
+            f"{self.sql(expression, 'expression')})"
+        )
+
+    def tsordsadd_sql(self, expression: exp.Expression) -> str:
+        this = self.sql(expression, "this")
+        expr_node = expression.expression
+        if isinstance(expr_node, exp.Mul):
+            right = expr_node.expression
+            if (
+                isinstance(right, exp.Neg)
+                and isinstance(right.this, exp.Literal)
+                and right.this.this == "1"
+            ):
+                left = self.sql(expr_node, "this")
+                return f"DATE_SUB({this}, {left})"
+        return f"DATE_ADD({this}, {self.sql(expression, 'expression')})"
+
+    def not_sql(self, expression: exp.Expression) -> str:
+        child = expression.this
+        if isinstance(child, exp.Is) and isinstance(child.expression, exp.Null):
+            return f"{self.sql(child, 'this')} IS NOT NULL"
+        if isinstance(child, exp.In):
+            return f"{self.sql(child, 'this')} NOT IN ({self.expressions(child)})"
+        return super().not_sql(expression)  # type: ignore[arg-type, no-any-return]
+
+
+_GENERATOR = _PreservingGenerator()
+
+
+# ---------------------------------------------------------------------------
+# Resolution helpers
+# ---------------------------------------------------------------------------
+
+
+def _is_qualified_reference(result: list[str]) -> bool:
+    """Check if result is a single dotted reference like ``['cte.col']``."""
+    return len(result) == 1 and "." in result[0]
+
+
+def _is_not_already_resolved_qualified_reference(
+    result: list[str], column: str
+) -> bool:
+    """Check if result is a qualified reference that changed from the input."""
+    return _is_qualified_reference(result) and result != [column]
+
+
+# ---------------------------------------------------------------------------
+# NestedResolver class
+# ---------------------------------------------------------------------------
+
+
+class NestedResolver:
+    """Resolve column references through subqueries and CTEs.
+
+    Owns the complete lifecycle of nested query resolution:
+
+    1. **Body extraction** — render CTE/subquery AST nodes back to SQL
+       via :class:`_PreservingGenerator`.
+    2. **Column resolution** — parse bodies with sub-Parsers and resolve
+       ``subquery.column`` references to actual columns.
+    3. **Unqualified alias resolution** — detect column names that are actually
+       aliases defined inside nested queries.
+
+    :param ast: Root AST node (for body extraction).
+    """
+
+    def __init__(self, ast: exp.Expression):
+        self._ast = ast
+
+        # Lazy caches
+        self._subqueries_parsers: dict[str, "Parser"] = {}
+        self._with_parsers: dict[str, "Parser"] = {}
+        self._columns_aliases: dict[str, str | list[str]] = {}
+        self._cached_cte_nodes: list[exp.CTE] | None = None
+
+        # Set by resolve() caller
+        self._subqueries_names: list[str] = []
+        self._subqueries: dict[str, str] = {}
+        self._with_names: list[str] = []
+        self._with_queries: dict[str, str] = {}
+
+    # -------------------------------------------------------------------
+    # Public API — name extraction
+    # -------------------------------------------------------------------
+
+    def extract_cte_names(
+        self,
+        cte_name_map: dict[str, str],
+    ) -> list[str]:
+        """Extract CTE names from the AST.
+
+        Called by :attr:`Parser.with_names`.
+
+        :param cte_name_map: Mapping of placeholder names to original
+            qualified names, e.g. ``{"db__DOT__cte": "db.cte"}``.
+            Built by :func:`SqlCleaner._normalize_cte_names` because
+            sqlglot cannot parse dots in CTE names — they get rewritten
+            to placeholders before parsing.  This map restores the
+            original names in the output.
+        :returns: List of CTE names, e.g. ``["db.cte", "sales"]``.
+        """
+        return UniqueList([
+            cte_name_map.get(cte.alias, cte.alias) for cte in self._cte_nodes()
+        ])
+
+    def extract_cte_bodies(
+        self,
+        cte_name_map: dict[str, str],
+    ) -> dict[str, str]:
+        """Extract CTE body SQL for each CTE in the AST.
+
+        :param cte_name_map: Placeholder-to-original mapping, e.g.
+            ``{"db__DOT__cte": "db.cte"}``.  See :meth:`extract_cte_names`
+            for details.
+        :returns: Mapping of ``{cte_name: body_sql}``,
+            e.g. ``{"db.cte": "SELECT id FROM t"}``.
+        """
+        results: dict[str, str] = {}
+        for cte in self._cte_nodes():
+            alias = cte.alias
+            original_name = cte_name_map.get(alias, alias)
+            results[original_name] = self._body_sql(cte.this)
+
+        return results
+
+    @staticmethod
+    def extract_subqueries(
+        ast: exp.Expression,
+    ) -> tuple[list[str], dict[str, str]]:
+        """Extract subquery names and bodies in a single post-order walk.
+
+        Aliased subqueries keep their alias as the name.  Unaliased
+        subqueries (e.g. ``WHERE id IN (SELECT …)``) get auto-generated
+        names ``subquery_1``, ``subquery_2``, etc.
+
+        Example SQL::
+
+            SELECT * FROM (SELECT id FROM t) AS sub
+            WHERE id IN (SELECT id FROM t2)
+
+        :returns: ``(names, bodies)`` where *names* is ordered innermost-first,
+            e.g. ``(["subquery_1", "sub"], {...})``.
+        """
+        names: list[str] = UniqueList()
+        bodies: dict[str, str] = {}
+        NestedResolver._walk_subqueries(ast, names, bodies, 0)
+        return names, bodies
+
+    # -------------------------------------------------------------------
+    # Public API — column resolution
+    # -------------------------------------------------------------------
+
+    def resolve(
+        self,
+        columns: "UniqueList",
+        columns_dict: dict[str, UniqueList],
+        columns_aliases: dict[str, str | list[str]],
+        subqueries_names: list[str],
+        subqueries: dict[str, str],
+        with_names: list[str],
+        with_queries: dict[str, str],
+    ) -> tuple[UniqueList, dict[str, UniqueList], dict[str, str | list[str]]]:
+        """Resolve columns that reference subqueries or CTEs.
+
+        Two-phase resolution:
+
+        1. Replace ``subquery.column`` references with the actual column
+           from the subquery/CTE definition.
+        2. Drop unqualified column names that are actually aliases defined
+           inside a nested query.
+
+        Also applies the same resolution to *columns_dict*.
+
+        Example SQL::
+
+            WITH cte AS (SELECT a FROM t)
+            SELECT cte.a FROM cte
+
+        :returns: Tuple of ``(columns, columns_dict, columns_aliases)``.
+        """
+        self._subqueries_names = subqueries_names
+        self._subqueries = subqueries
+        self._with_names = with_names
+        self._with_queries = with_queries
+        self._columns_aliases = columns_aliases
+
+        # For columns drop aliases as we need only actual columns
+        columns = self._resolve_and_filter(columns, drop_unqualified_aliases=True)
+
+        if columns_dict:
+            # For columns_dict do not drop aliases but instead resolve them to columns.
+            # That ensures the column is present in all the relevant sections regardless
+            # if it's called directly or by alias i.e. SELECT a AS x FROM tbl ORDER BY x
+            # the column a should appear both in select and order_by sections.
+            for section, cols in list(columns_dict.items()):
+                columns_dict[section] = self._resolve_and_filter(
+                    cols, drop_unqualified_aliases=False
+                )
+
+        return columns, columns_dict, self._columns_aliases
+
+    def resolve_column_alias(
+        self, alias: str | list[str], columns_aliases: dict[str, str | list[str]]
+    ) -> list[str]:
+        """Public interface for alias resolution (used by parser.py).
+
+        Example SQL::
+
+            SELECT a AS x FROM t ORDER BY x
+
+        Resolves ``"x"`` → ``"a"`` using the alias map.
+        """
+        return self._resolve_column_alias(alias, columns_aliases)
+
+    # -------------------------------------------------------------------
+    # Resolution pipeline — callers before callees
+    # -------------------------------------------------------------------
+
+    def _resolve_and_filter(
+        self, columns: "UniqueList", drop_unqualified_aliases: bool = True
+    ) -> "UniqueList":
+        """Apply subquery/CTE resolution and unqualified-alias handling.
+
+        Phase 1: resolve ``sub.col`` references via :meth:`_resolve_sub_queries`.
+        Phase 2: detect unqualified names that are nested-query aliases.
+
+        Example SQL::
+
+            SELECT sub.id FROM (SELECT id FROM users) AS sub
+
+        Phase 1 resolves ``sub.id`` → ``id``.
+        Phase 2 checks if ``id`` is a nested alias (it is not, so it stays).
+        """
+        resolved: list[str] = UniqueList()
+        for col in columns:
+            resolved.extend(self._resolve_sub_queries(col))
+
+        final = UniqueList()
+        for col in resolved:
+            if "." in col:
+                # e.g. schema.col — skip unqualified alias resolution
+                final.append(col)
+                continue
+            new_cols = self._resolve_unqualified_through_nested(col)
+            if new_cols != [col]:
+                # e.g. SELECT x FROM (SELECT a AS x FROM t) AS sub
+                # — "x" resolved to "a", drop the alias from columns
+                if not drop_unqualified_aliases:
+                    final.extend(new_cols)
+                continue
+            # e.g. SELECT id FROM t — no alias match, keep as-is
+            final.append(col)
+        return final
+
+    def _resolve_sub_queries(self, column: str) -> list[str]:
+        """Resolve a ``subquery.column`` reference to actual column(s).
+
+        Tries subquery sources first, then CTE sources.
+
+        Example SQL::
+
+            SELECT sub.id FROM (SELECT id FROM users) AS sub
+
+        Resolves ``"sub.id"`` → ``["id"]``.
+        """
+        result: list[str] = [column]
+        for names, defs, cache in self._nested_sources():
+            if _is_qualified_reference(result):
+                # e.g. "sub.id" — still a qualified reference, try next source
+                result = self._resolve_nested_query(
+                    subquery_alias=result[0],
+                    nested_queries_names=names,
+                    nested_queries=defs,
+                    already_parsed=cache,
+                )
+        # Recursively resolve chained CTE references: c3.a → c2.a → c1.a → a
+        if _is_not_already_resolved_qualified_reference(result, column):
+            return self._resolve_sub_queries(result[0])
+        return result
+
+    def _resolve_unqualified_through_nested(
+        self, col_name: str
+    ) -> list[str]:
+        """Resolve an unqualified column name through subquery/CTE alias definitions.
+
+        Checks subquery aliases first (``check_columns=True``), then CTE
+        aliases (``check_columns=False``).
+
+        Example SQL::
+
+            SELECT x FROM (SELECT a AS x FROM users) AS sub
+
+        Resolves ``"x"`` → ``["a"]`` (found as alias in subquery body).
+        """
+        for i, (names, defs, cache) in enumerate(self._nested_sources()):
+            # check_columns for subqueries only — prevents CTE aliases
+            # from claiming subquery columns, e.g. in:
+            #   WITH cte AS (SELECT x AS name FROM t1)
+            #   SELECT name FROM (SELECT name FROM t2) AS sub
+            # "name" is a real column in sub, not the CTE alias.
+            result = self._lookup_alias_in_nested(
+                col_name, names, defs, cache, check_columns=(i == 0)
+            )
+            if result is not None:
+                return result
+        return [col_name]
+
+    def _lookup_alias_in_nested(
+        self,
+        col_name: str,
+        names: list[str],
+        definitions: dict[str, str],
+        parser_cache: dict[str, "Parser"],
+        check_columns: bool = False,
+    ) -> list[str] | None:
+        """Search for an unqualified column as an alias in nested queries.
+
+        Parses each nested query body and checks whether *col_name* is a
+        known column alias inside that body.  Three outcomes are possible:
+
+        1. **Alias match** — the column is an alias defined inside a nested
+           query and gets resolved to the underlying column(s)::
+
+               WITH cte AS (SELECT a AS x FROM t) SELECT x FROM cte
+               -- "x" found as alias in CTE body → resolves to ["a"]
+
+           Multi-column aliases are also handled::
+
+               SELECT y FROM (SELECT a + b AS y FROM t) AS sub
+               -- "y" found as alias → resolves to ["a + b"]
+
+        2. **Direct column match** (subqueries only, ``check_columns=True``) —
+           the column exists directly in the nested query and is kept as-is::
+
+               SELECT id FROM (SELECT id FROM users) AS sub
+               -- "id" found as real column in subquery → returns ["id"]
+
+        3. **No match** — the column is not found in any nested query,
+           returns ``None`` so the caller can try other sources or keep
+           the column unchanged::
+
+               SELECT name FROM (SELECT id FROM users) AS sub
+               -- "name" not in subquery → returns None
+        """
+        from sql_metadata.parser import Parser
+
+        for nested_name in names:
+            nested_def = definitions[nested_name]
+            nested_parser = parser_cache.setdefault(nested_name, Parser(nested_def))
+            if col_name in nested_parser.columns_aliases_names:
+                # Path 1: alias match — resolve through the full alias chain
+                # e.g. SELECT col1 AS a ... then SELECT a AS x ...
+                # resolving "x": follows x → a → col1, returns ["col1"]
+                resolved = self._resolve_column_alias(
+                    col_name, nested_parser.columns_aliases
+                )
+                # Record the immediate (one-step) alias mapping for the
+                # outer query's columns_aliases property, preserving the
+                # direct relationship as written in SQL:
+                # e.g. x → a (not the fully resolved x → col1)
+                if self._columns_aliases is not None:
+                    immediate = nested_parser.columns_aliases.get(col_name, resolved)
+                    self._columns_aliases[col_name] = immediate
+                return resolved
+            if check_columns and col_name in nested_parser.columns:
+                # Path 2: direct column match in subquery
+                return [col_name]
+        # Path 3: not found in any nested query
+        return None
+
+    @staticmethod
+    def _resolve_nested_query(
+        subquery_alias: str,
+        nested_queries_names: list[str],
+        nested_queries: dict[str, str],
+        already_parsed: dict[str, "Parser"],
+    ) -> list[str]:
+        """Resolve a ``prefix.column`` reference through a nested query.
+
+        Splits the alias on ``"."`` — if the prefix matches a known
+        nested query name, parses that query and resolves the column.
+
+        Example SQL::
+
+            SELECT sub.id FROM (SELECT id FROM users) AS sub
+
+        Resolving ``"sub.id"``: prefix ``"sub"`` matches, column
+        ``"id"`` is found in the subquery → returns ``["id"]``.
+        """
+        from sql_metadata.parser import Parser
+
+        parts = subquery_alias.split(".")
+        if len(parts) != 2 or parts[0] not in nested_queries_names:
+            # e.g. "table.col" or "schema.table.col" — not a subquery ref
+            return [subquery_alias]
+        sub_query, column_name = parts[0], parts[-1]
+        sub_query_definition = nested_queries[sub_query]
+        subparser = already_parsed.setdefault(sub_query, Parser(sub_query_definition))
+        return NestedResolver._resolve_column_in_subparser(
+            column_name, subparser, subquery_alias
+        )
+
+    @staticmethod
+    def _resolve_column_in_subparser(
+        column_name: str, subparser: "Parser", original_ref: str
+    ) -> list[str]:
+        """Resolve a column name through a parsed nested query.
+
+        Three resolution paths:
+
+        1. Column name is a known alias in the subparser → resolve it.
+        2. Column name is ``*`` → return all subparser columns.
+        3. Otherwise → fall back to positional/wildcard matching.
+
+        Example SQL (path 1 — alias)::
+
+            SELECT sub.x FROM (SELECT a AS x FROM t) AS sub
+
+        ``"x"`` is an alias → resolves to ``["a"]``.
+
+        Example SQL (path 2 — star)::
+
+            SELECT sub.* FROM (SELECT a, b FROM t) AS sub
+
+        ``"*"`` → returns ``["a", "b"]``.
+        """
+        if column_name in subparser.columns_aliases_names:
+            # e.g. sub.x where x is aliased to a → resolve alias chain
+            return subparser._resolve_column_alias(column_name)
+        if column_name == "*":
+            # e.g. sub.* → return all columns from subquery
+            return subparser.columns
+        return NestedResolver._find_column_fallback(
+            column_name, subparser, original_ref
+        )
+
+    @staticmethod
+    def _find_column_fallback(
+        column_name: str, subparser: "Parser", original_ref: str
+    ) -> list[str]:
+        """Find a column by name in the subparser with wildcard fallbacks.
+
+        Tries to match *column_name* against the last segment of each
+        subparser column.  If no match is found, checks for wildcard
+        columns (``*`` or ``table.*``) before giving up.
+
+        Example SQL (positional match)::
+
+            SELECT sub.id FROM (SELECT users.id FROM users) AS sub
+
+        ``"id"`` matches ``"users.id"`` by last segment → ``["users.id"]``.
+
+        Example SQL (wildcard fallback)::
+
+            SELECT sub.id FROM (SELECT * FROM users) AS sub
+
+        ``"id"`` not found, but subparser has ``*`` → returns ``["id"]``.
+        """
+        try:
+            idx = [last_segment(x) for x in subparser.columns].index(column_name)
+        except ValueError:
+            if "*" in subparser.columns:
+                # e.g. SELECT * FROM t — subquery selects everything
+                return [column_name]
+            for table in subparser.tables:
+                if f"{table}.*" in subparser.columns:
+                    # e.g. SELECT t.* FROM t — table-qualified wildcard
+                    return [column_name]
+            # e.g. column not found in subquery at all — keep original ref
+            return [original_ref]
+        # e.g. "id" matched at position idx → return fully-qualified form
+        return [subparser.columns[idx]]
+
+    # -------------------------------------------------------------------
+    # Alias resolution
+    # -------------------------------------------------------------------
+
+    def _resolve_column_alias(
+        self,
+        alias: str | list[str],
+        columns_aliases: dict[str, str | list[str]],
+        visited: set[str] | None = None,
+    ) -> list[str]:
+        """Recursively resolve a column alias to its underlying column(s).
+
+        Follows alias chains until a non-alias column is reached.
+        Tracks visited aliases to prevent infinite loops on circular
+        definitions.
+
+        Example SQL::
+
+            WITH cte AS (SELECT a AS x FROM t) SELECT x AS y FROM cte
+
+        Resolving ``"y"`` → ``"x"`` → ``["a"]``.
+        """
+        visited = visited or set()
+        if isinstance(alias, list):
+            # e.g. alias mapped to multiple columns — resolve each
+            return [
+                item
+                for x in alias
+                for item in self._resolve_column_alias(x, columns_aliases, visited)
+            ]
+        while alias in columns_aliases and alias not in visited:
+            visited.add(alias)
+            alias = columns_aliases[alias]
+            if isinstance(alias, list):
+                # e.g. alias mapped to [col1, col2] — resolve list recursively
+                return self._resolve_column_alias(alias, columns_aliases, visited)
+        return [alias]
+
+    # -------------------------------------------------------------------
+    # Shared helpers
+    # -------------------------------------------------------------------
+
+    def _nested_sources(
+        self,
+    ) -> list[tuple[list[str], dict[str, str], dict[str, "Parser"]]]:
+        """Return the (names, defs, cache) tuples for subqueries then CTEs.
+
+        Subqueries are checked first because they are more specific than
+        CTEs — a column reference ``sub.col`` should resolve against the
+        subquery named ``sub`` before falling back to a CTE with the
+        same name.
+        """
+        return [
+            (self._subqueries_names, self._subqueries, self._subqueries_parsers),
+            (self._with_names, self._with_queries, self._with_parsers),
+        ]
+
+    def _cte_nodes(self) -> list[exp.CTE]:
+        """Return all ``exp.CTE`` nodes from the AST (cached).
+
+        Example SQL::
+
+            WITH a AS (SELECT 1), b AS (SELECT 2) SELECT * FROM a, b
+
+        Returns two ``exp.CTE`` nodes (for ``a`` and ``b``).
+        """
+        if self._cached_cte_nodes is None:
+            self._cached_cte_nodes = list(self._ast.find_all(exp.CTE))
+        return self._cached_cte_nodes
+
+    # -------------------------------------------------------------------
+    # Body extraction helpers
+    # -------------------------------------------------------------------
+
+    @staticmethod
+    def _body_sql(node: exp.Expression) -> str:
+        """Render an AST node to SQL, stripping identifier quoting.
+
+        Example SQL::
+
+            WITH cte AS (SELECT "id" FROM "users") ...
+
+        Renders the CTE body as ``SELECT id FROM users`` (quotes stripped).
+        """
+        body = copy.deepcopy(node)
+        for ident in body.find_all(exp.Identifier):
+            ident.set("quoted", False)
+        return _GENERATOR.generate(body)
+
+    @staticmethod
+    def _walk_subqueries(
+        node: exp.Expression,
+        names: list[str],
+        bodies: dict[str, str],
+        counter: int,
+    ) -> int:
+        """Post-order walk collecting subquery names and bodies.
+
+        Returns the updated *counter* so unnamed subqueries are numbered
+        sequentially.
+
+        Example SQL::
+
+            SELECT * FROM (SELECT 1) AS named, (SELECT 2)
+
+        Produces names ``["named", "subquery_1"]`` with corresponding bodies.
+        """
+        for child in node.iter_expressions():
+            counter = NestedResolver._walk_subqueries(
+                child, names, bodies, counter
+            )
+        if isinstance(node, exp.Subquery):
+            if node.alias:
+                # e.g. (SELECT 1) AS named — use the explicit alias
+                name = node.alias
+            else:
+                # e.g. WHERE id IN (SELECT 1) — auto-generate name
+                counter += 1
+                name = f"subquery_{counter}"
+            names.append(name)
+            bodies[name] = NestedResolver._body_sql(node.this)
+        return counter
diff --git a/sql_metadata/parser.py b/sql_metadata/parser.py
index 122075c8..43fe8239 100644
--- a/sql_metadata/parser.py
+++ b/sql_metadata/parser.py
@@ -1,34 +1,45 @@
-# pylint: disable=C0302
-"""
-This module provides SQL query parsing functions
+"""SQL query parsing facade.
+
+Thin facade that composes the specialised extractors via lazy properties:
+
+* :class:`~ast_parser.ASTParser` — AST construction and dialect detection.
+* :class:`~column_extractor.ColumnExtractor` — single-pass column/alias extraction.
+* :class:`~table_extractor.TableExtractor` — table extraction with position sorting.
+* :class:`~nested_resolver.NestedResolver` — CTE/subquery name and body extraction,
+  nested column resolution.
+* :mod:`query_type_extractor` — query type detection.
+* :mod:`comments` — comment extraction.
 """
 
 import logging
 import re
-from typing import Dict, List, Optional, Set, Tuple, Union
+from typing import Any
 
-import sqlparse
-from sqlparse.sql import Token
-from sqlparse.tokens import Name, Number, Whitespace
+from sqlglot import exp
 
+from sql_metadata.ast_parser import ASTParser
+from sql_metadata.column_extractor import ColumnExtractor
+from sql_metadata.comments import extract_comments, strip_comments
 from sql_metadata.generalizator import Generalizator
-from sql_metadata.keywords_lists import (
-    COLUMNS_SECTIONS,
-    KEYWORDS_BEFORE_COLUMNS,
-    TokenType,
-    RELEVANT_KEYWORDS,
-    SUBQUERY_PRECEDING_KEYWORDS,
-    SUPPORTED_QUERY_TYPES,
-    TABLE_ADJUSTMENT_KEYWORDS,
-    WITH_ENDING_KEYWORDS,
-)
-from sql_metadata.token import EmptyToken, SQLToken
-from sql_metadata.utils import UniqueList, flatten_list
-
-
-class Parser:  # pylint: disable=R0902
-    """
-    Main class to parse sql query
+from sql_metadata.keywords_lists import QueryType
+from sql_metadata.nested_resolver import NestedResolver
+from sql_metadata.query_type_extractor import QueryTypeExtractor
+from sql_metadata.table_extractor import TableExtractor
+from sql_metadata.utils import UniqueList
+
+
+class Parser:
+    """Parse a SQL query and extract metadata.
+
+    The primary public interface of the ``sql-metadata`` library.  Given a
+    raw SQL string, the parser lazily extracts tables, columns, aliases,
+    CTE definitions, subqueries, values, comments, and more — each
+    available as a cached property.
+
+    :param sql: The SQL query string to parse.
+    :type sql: str
+    :param disable_logging: If ``True``, suppress all log output.
+    :type disable_logging: bool
     """
 
     def __init__(self, sql: str = "", disable_logging: bool = False) -> None:
@@ -36,1091 +47,499 @@ def __init__(self, sql: str = "", disable_logging: bool = False) -> None:
         self._logger.disabled = disable_logging
 
         self._raw_query = sql
-        self._query = self._preprocess_query()
-        self._query_type = None
-
-        self._tokens = None
-
-        self._columns = None
-        self._columns_dict = None
-        self._columns_aliases_names = None
-        self._columns_aliases = None
-        self._columns_with_tables_aliases = {}
-        self._columns_aliases_dict = None
-
-        self._tables = None
-        self._table_aliases = None
-
-        self._with_names = None
-        self._with_queries = None
-        self._with_queries_columns = None
-        self._subqueries = None
-        self._subqueries_names = None
-        self._subqueries_parsers = {}
-        self._with_parsers = {}
-
-        self._limit_and_offset = None
-
-        self._values = None
-        self._values_dict = None
-
-        self._subquery_level = 0
-        self._nested_level = 0
-        self._parenthesis_level = 0
-        self._open_parentheses: List[SQLToken] = []
-        self._preceded_keywords: List[SQLToken] = []
-        self._aliases_to_check = None
-        self._is_in_nested_function = False
-        self._is_in_with_block = False
-        self._with_columns_candidates = {}
-        self._column_aliases_max_subquery_level = {}
-
-        self.sqlparse_tokens = None
-        self.non_empty_tokens = None
-        self.tokens_length = None
+        self._query_type: str | None = None
+
+        self._ast_parser = ASTParser(sql)
+        self._resolver: NestedResolver | None = None
+
+        self._tokens: list[str] | None = None
+
+        self._columns: UniqueList | None = None
+        self._columns_dict: dict[str, UniqueList] | None = None
+        self._columns_aliases_names: UniqueList | None = None
+        self._columns_aliases: dict[str, str | list[str]] | None = None
+        self._columns_aliases_dict: dict[str, UniqueList] | None = None
+        self._columns_with_tables_aliases: dict[str, str] = {}
+
+        self._tables: list[str] | None = None
+        self._table_aliases: dict[str, str] | None = None
+
+        self._with_names: list[str] | None = None
+        self._with_queries: dict[str, str] | None = None
+        self._subqueries: dict[str, str] | None = None
+        self._subqueries_names: list[str] | None = None
+
+        self._limit_and_offset: tuple[int, int] | None = None
+
+        self._output_columns: list[str] | None = None
+
+        self._values: list[Any] | None = None
+        self._values_dict: dict[str, int | float | str | list[Any]] | None = None
+
+    # -------------------------------------------------------------------
+    # NestedResolver access
+    # -------------------------------------------------------------------
+
+    def _get_resolver(self) -> NestedResolver:
+        """Return (and cache) the NestedResolver instance."""
+        if self._resolver is None:
+            ast = self._ast_parser.ast
+            assert ast is not None
+            self._resolver = NestedResolver(ast)
+        return self._resolver
+
+    # -------------------------------------------------------------------
+    # Query preprocessing
+    # -------------------------------------------------------------------
 
     @property
     def query(self) -> str:
-        """
-        Returns preprocessed query
-        """
-        return self._query.replace("\n", " ").replace("  ", " ")
+        """Return the preprocessed SQL query."""
+        return self._preprocess_query().replace("\n", " ").replace("  ", " ")
+
+    def _preprocess_query(self) -> str:
+        """Normalise quoting in the raw query."""
+        if self._raw_query == "":
+            return ""
+
+        def replace_quotes_in_string(match: re.Match[str]) -> str:
+            return re.sub('"', "<!!__QUOTE__!!>", match.group())
+
+        def replace_back_quotes_in_string(match: re.Match[str]) -> str:
+            return re.sub("<!!__QUOTE__!!>", '"', match.group())
+
+        query = re.sub(r"'.*?'", replace_quotes_in_string, self._raw_query)
+        query = re.sub(r'"([^`]+?)"', r"`\1`", query)
+        query = re.sub(r"'.*?'", replace_back_quotes_in_string, query)
+        return query
+
+    # -------------------------------------------------------------------
+    # Query type
+    # -------------------------------------------------------------------
 
     @property
     def query_type(self) -> str:
-        """
-        Returns type of the query.
-        Currently supported queries are:
-        select, insert, update, replace, create table, alter table, with + select
-        """
+        """Return the type of the SQL query."""
         if self._query_type:
             return self._query_type
-        if not self._tokens:
-            _ = self.tokens
-
-        # remove comment tokens to not confuse the logic below (see #163)
-        tokens: List[SQLToken] = list(
-            filter(lambda token: not token.is_comment, self._tokens or [])
-        )
-
-        if not tokens:
-            raise ValueError("Empty queries are not supported!")
-
-        index = (
-            0
-            if not tokens[0].is_left_parenthesis
-            else tokens[0]
-            .find_nearest_token(
-                value=False, value_attribute="is_left_parenthesis", direction="right"
-            )
-            .position
-        )
-        if tokens[index].normalized == "CREATE":
-            switch = self._get_switch_by_create_query(tokens, index)
-        elif tokens[index].normalized in ("ALTER", "DROP", "TRUNCATE"):
-            switch = tokens[index].normalized + tokens[index + 1].normalized
-        else:
-            switch = tokens[index].normalized
-        self._query_type = SUPPORTED_QUERY_TYPES.get(switch, "UNSUPPORTED")
-        if self._query_type == "UNSUPPORTED":
-            # do not log the full query
-            # https://github.com/macbre/sql-metadata/issues/543
-            shorten_query = " ".join(self._raw_query.split(" ")[:3])
-
-            self._logger.error("Not supported query type: %s", shorten_query)
-            raise ValueError("Not supported query type!")
+        try:
+            ast = self._ast_parser.ast
+        except ValueError:
+            ast = None
+        self._query_type = QueryTypeExtractor(ast, self._raw_query).extract()
+        if self._query_type == QueryType.INSERT and self._ast_parser.is_replace:
+            self._query_type = QueryType.REPLACE
         return self._query_type
 
+    # -------------------------------------------------------------------
+    # Tokens
+    # -------------------------------------------------------------------
+
     @property
-    def tokens(self) -> List[SQLToken]:  # noqa: C901
-        """
-        Tokenizes the query
-        """
+    def tokens(self) -> list[str]:
+        """Return the SQL as a list of token strings."""
         if self._tokens is not None:
             return self._tokens
+        if not self._raw_query or not self._raw_query.strip():
+            self._tokens = []
+            return self._tokens
+        from sql_metadata.comments import _choose_tokenizer
 
-        # allow parser to be overriden
-        parsed = self._parse(self._query)
-        tokens = []
-        # handle empty queries (#12)
-        if not parsed:
-            return tokens
-        self._get_sqlparse_tokens(parsed)
-        last_keyword = None
-        combine_flag = False
-        for index, tok in enumerate(self.non_empty_tokens):
-            # combine dot separated identifiers
-            if self._is_token_part_of_complex_identifier(token=tok, index=index):
-                combine_flag = True
-                continue
-            token = SQLToken(
-                tok=tok,
-                index=index,
-                subquery_level=self._subquery_level,
-                last_keyword=last_keyword,
-            )
-            if combine_flag:
-                self._combine_qualified_names(index=index, token=token)
-                combine_flag = False
-
-            previous_token = tokens[-1] if index > 0 else EmptyToken
-            token.previous_token = previous_token
-            previous_token.next_token = token if index > 0 else None
-
-            if token.is_left_parenthesis:
-                token.token_type = TokenType.PARENTHESIS
-                self._determine_opening_parenthesis_type(token=token)
-            elif token.is_right_parenthesis:
-                token.token_type = TokenType.PARENTHESIS
-                self._determine_closing_parenthesis_type(token=token)
-                if token.is_subquery_end:
-                    last_keyword = self._preceded_keywords.pop()
-
-            last_keyword = self._determine_last_relevant_keyword(
-                token=token, last_keyword=last_keyword
+        try:
+            sg_tokens = list(
+                _choose_tokenizer(self._raw_query).tokenize(self._raw_query)
             )
-            token.is_in_nested_function = self._is_in_nested_function
-            token.parenthesis_level = self._parenthesis_level
-            tokens.append(token)
-
-        self._tokens = tokens
-        # since tokens are used in all methods required parsing (so w/o generalization)
-        # we set the query type here (and not in init) to allow for generalization
-        # but disallow any other usage for not supported queries to avoid unexpected
-        # results which are not really an error
-        _ = self.query_type
-        return tokens
+        # TODO: revisit if sqlglot tokenizer starts raising on specific inputs
+        except Exception:  # pragma: no cover
+            sg_tokens = []
+        self._tokens = [t.text.strip("`").strip('"') for t in sg_tokens]
+        return self._tokens
+
+    # -------------------------------------------------------------------
+    # Columns
+    # -------------------------------------------------------------------
 
     @property
-    def columns(self) -> List[str]:
-        """
-        Returns the list columns this query refers to
-        """
+    def columns(self) -> list[str]:
+        """Return the list of column names referenced in the query."""
         if self._columns is not None:
             return self._columns
-        columns = UniqueList()
-
-        for token in self._not_parsed_tokens:
-            if token.is_name or token.is_keyword_column_name:
-                if token.is_column_definition_inside_create_table(
-                    query_type=self.query_type
-                ):
-                    token.token_type = TokenType.COLUMN
-                    columns.append(token.value)
-                elif (
-                    token.is_potential_column_name
-                    and token.is_not_an_alias_or_is_self_alias_outside_of_subquery(
-                        columns_aliases_names=self.columns_aliases_names,
-                        max_subquery_level=self._column_aliases_max_subquery_level,
-                    )
-                    and not token.is_sub_query_name_or_with_name_or_function_name(
-                        sub_queries_names=self.subqueries_names,
-                        with_names=self.with_names,
-                    )
-                    and not token.is_table_definition_suffix_in_non_select_create_table(
-                        query_type=self.query_type
-                    )
-                    and not token.is_conversion_specifier
-                ):
-                    self._handle_column_save(token=token, columns=columns)
-
-                elif token.is_column_name_inside_insert_clause:
-                    column = str(token.value)
-                    self._add_to_columns_subsection(
-                        keyword=token.last_keyword_normalized, column=column
-                    )
-                    token.token_type = TokenType.COLUMN
-                    columns.append(column)
-            elif token.is_a_wildcard_in_select_statement:
-                self._handle_column_save(token=token, columns=columns)
 
-        self._columns = columns
+        try:
+            ast = self._ast_parser.ast
+            ta = self.tables_aliases
+        except ValueError:
+            cols = self._extract_columns_regex()
+            self._columns = UniqueList(cols)
+            self._columns_dict = {}
+            self._columns_aliases_names = UniqueList()
+            self._columns_aliases_dict = {}
+            self._columns_aliases = {}
+            self._output_columns = []
+            return self._columns
+
+        if ast is None:  # pragma: no cover — tables_aliases raises for None ast
+            self._columns = UniqueList()
+            self._columns_dict = {}
+            self._columns_aliases_names = UniqueList()
+            self._columns_aliases_dict = {}
+            self._columns_aliases = {}
+            self._output_columns = []
+            return self._columns
+
+        extractor = ColumnExtractor(ast, ta, self._ast_parser.cte_name_map)
+        result = extractor.extract()
+
+        self._columns = result.columns
+        self._columns_dict = result.columns_dict
+        self._columns_aliases_names = result.alias_names
+        self._columns_aliases_dict = result.alias_dict
+        self._columns_aliases = result.alias_map if result.alias_map else {}
+        self._output_columns = result.output_columns
+
+        # Use only aliased subquery names for column resolution —
+        # auto-generated names (subquery_1, …) are never referenced in SQL.
+        aliased_names = result.subquery_names
+        all_names, all_bodies = NestedResolver.extract_subqueries(ast)
+        aliased_bodies = {k: v for k, v in all_bodies.items() if k in aliased_names}
+        resolver = self._get_resolver()
+        self._columns, self._columns_dict, self._columns_aliases = resolver.resolve(
+            self._columns,
+            self._columns_dict,
+            self._columns_aliases,
+            aliased_names,
+            aliased_bodies,
+            self.with_names,
+            self.with_queries,
+        )
+        # Cache full results for the public properties
+        self._subqueries_names = all_names
+        self._subqueries = all_bodies
+
         return self._columns
 
     @property
-    def columns_dict(self) -> Dict[str, List[str]]:
-        """
-        Returns dictionary of column names divided into section of the query in which
-        given column is present.
-
-        Sections consist of: select, where, order_by, group_by, join, insert and update
-        """
-        if not self._columns_dict:
+    def columns_dict(self) -> dict[str, UniqueList]:
+        """Return column names organised by query section."""
+        if self._columns_dict is None:
             _ = self.columns
+        assert self._columns_dict is not None
+        # Resolve aliases used in other sections
         if self.columns_aliases_dict:
+            resolver = self._get_resolver()
             for key, value in self.columns_aliases_dict.items():
                 for alias in value:
-                    resolved = self._resolve_column_alias(alias)
-                    if isinstance(resolved, list):
-                        for res_alias in resolved:
-                            self._columns_dict.setdefault(key, UniqueList()).append(
-                                res_alias
-                            )
-                    else:
-                        self._columns_dict.setdefault(key, UniqueList()).append(
-                            resolved
-                        )
+                    resolved = resolver.resolve_column_alias(
+                        alias, self.columns_aliases
+                    )
+                    for r in resolved:
+                        self._columns_dict.setdefault(key, UniqueList()).append(r)
         return self._columns_dict
 
     @property
-    def columns_aliases(self) -> Dict:
-        """
-        Returns a dictionary of column aliases with columns
-        """
-        if self._columns_aliases is not None:
-            return self._columns_aliases
-        column_aliases = {}
-        _ = self.columns
-        self._aliases_to_check = (
-            list(self._columns_with_tables_aliases.keys())
-            + self.columns_aliases_names
-            + ["*"]
-        )
-        for token in self.tokens:
-            if token.is_potential_column_alias(
-                column_aliases=column_aliases,
-                columns_aliases_names=self.columns_aliases_names,
-            ):
-                token_check = (
-                    token.previous_token
-                    if not token.previous_token.is_as_keyword
-                    else token.get_nth_previous(2)
-                )
-                if token_check.is_column_definition_end:
-                    alias_of = self._resolve_subquery_alias(token=token)
-                elif token_check.is_partition_clause_end:
-                    start_token = token.find_nearest_token(
-                        True, value_attribute="is_partition_clause_start"
-                    )
-                    alias_of = self._find_all_columns_between_tokens(
-                        start_token=start_token, end_token=token
-                    )
-                elif token.is_in_with_columns:
-                    # columns definition is to the right in subquery
-                    # we are in: with with_name (<aliases>) as (subquery)
-                    alias_of = self._find_column_for_with_column_alias(token)
-                else:
-                    alias_of = self._resolve_function_alias(token=token)
-                if token.value != alias_of:
-                    # skip aliases of self, like sum(column) as column
-                    column_aliases[token.value] = alias_of
-
-        self._columns_aliases = column_aliases
+    def columns_aliases(self) -> dict[str, str | list[str]]:
+        """Return the alias-to-column mapping for column aliases."""
+        if self._columns_aliases is None:
+            _ = self.columns
+        assert self._columns_aliases is not None
         return self._columns_aliases
 
     @property
-    def columns_aliases_dict(self) -> Dict[str, List[str]]:
-        """
-        Returns dictionary of column names divided into section of the query in which
-        given column is present.
-
-        Sections consist of: select, where, order_by, group_by, join, insert and update
-        """
-        if self._columns_aliases_dict:
-            return self._columns_aliases_dict
-        _ = self.columns_aliases_names
+    def columns_aliases_dict(self) -> dict[str, UniqueList] | None:
+        """Return column alias names organised by query section."""
+        if self._columns_aliases_dict is None:
+            _ = self.columns
         return self._columns_aliases_dict
 
     @property
-    def columns_aliases_names(self) -> List[str]:
-        """
-        Extract names of the column aliases used in query
-        """
-        if self._columns_aliases_names is not None:
-            return self._columns_aliases_names
-        column_aliases_names = UniqueList()
-        with_names = self.with_names
-        subqueries_names = self.subqueries_names
-        for token in self._not_parsed_tokens:
-            if token.is_potential_alias:
-                if token.value in column_aliases_names:
-                    self._handle_column_alias_subquery_level_update(token=token)
-                elif (
-                    token.is_a_valid_alias
-                    and token.value not in with_names + subqueries_names
-                ):
-                    column_aliases_names.append(token.value)
-                    self._handle_column_alias_subquery_level_update(token=token)
-
-        self._columns_aliases_names = column_aliases_names
+    def columns_aliases_names(self) -> list[str]:
+        """Return the names of all column aliases used in the query."""
+        if self._columns_aliases_names is None:
+            _ = self.columns
+        assert self._columns_aliases_names is not None
         return self._columns_aliases_names
 
     @property
-    def tables(self) -> List[str]:
-        """
-        Return the list of tables this query refers to
-        """
-        if self._tables is not None:
-            return self._tables
-        tables = UniqueList()
-        with_names = self.with_names
-
-        for token in self._not_parsed_tokens:
-            if token.is_potential_table_name:
-                if (
-                    token.is_alias_of_table_or_alias_of_subquery
-                    or token.is_with_statement_nested_in_subquery
-                    or token.is_constraint_definition_inside_create_table_clause(
-                        query_type=self.query_type
-                    )
-                    or token.is_columns_alias_of_with_query_or_column_in_insert_query(
-                        with_names=with_names
-                    )
-                ):
-                    continue
-
-                # handle INSERT INTO ON DUPLICATE KEY UPDATE queries
-                if (
-                    token.last_keyword_normalized == "UPDATE"
-                    and self.query_type == "INSERT"
-                ):
-                    continue
-                token.token_type = TokenType.TABLE
-                tables.append(str(token.value))
-
-        self._tables = tables - with_names
-        return self._tables
+    def output_columns(self) -> list[str]:
+        """Return the ordered list of SELECT output column names.
 
-    @property
-    def limit_and_offset(self) -> Optional[Tuple[int, int]]:
-        """
-        Returns value for limit and offset if set
+        Combines real columns and aliases in their original position.
+        For example, ``SELECT a, b AS c FROM t`` returns ``["a", "c"]``.
         """
-        if self._limit_and_offset is not None:
-            return self._limit_and_offset
-        limit = None
-        offset = None
-
-        for token in self._not_parsed_tokens:
-            if token.is_integer:
-                if token.last_keyword_normalized == "LIMIT" and not limit:
-                    # LIMIT <limit>
-                    limit = int(token.value)
-                elif token.last_keyword_normalized == "OFFSET":
-                    # OFFSET <offset>
-                    offset = int(token.value)
-                elif (
-                    token.previous_token.is_punctuation
-                    and token.last_keyword_normalized == "LIMIT"
-                ):
-                    # LIMIT <offset>,<limit>
-                    #  enter this condition only when the limit has already been parsed
-                    offset = limit
-                    limit = int(token.value)
-
-        if limit is None:
-            return None
+        if self._output_columns is None:
+            _ = self.columns
+        assert self._output_columns is not None
+        return self._output_columns
 
-        self._limit_and_offset = limit, offset or 0
-        return self._limit_and_offset
+    # -------------------------------------------------------------------
+    # Tables
+    # -------------------------------------------------------------------
 
     @property
-    def tables_aliases(self) -> Dict[str, str]:
-        """
-        Returns tables aliases mapping from a given query
+    def tables(self) -> list[str]:
+        """Return the list of table names referenced in the query."""
+        if self._tables is not None:
+            return self._tables
+        _ = self.query_type
+        ast = self._ast_parser.ast
+        assert ast is not None  # guaranteed by query_type raising on None
+        cte_names = set(self.with_names)
+        for placeholder in self._ast_parser.cte_name_map:
+            cte_names.add(placeholder)
+        extractor = TableExtractor(
+            ast,
+            self._raw_query,
+            cte_names,
+            dialect=self._ast_parser.dialect,
+        )
+        self._tables = extractor.extract()
+        return self._tables
 
-        E.g. SELECT a.* FROM users1 AS a JOIN users2 AS b ON a.ip_address = b.ip_address
-        will give you {'a': 'users1', 'b': 'users2'}
-        """
+    @property
+    def tables_aliases(self) -> dict[str, str]:
+        """Return the table alias mapping for this query."""
         if self._table_aliases is not None:
             return self._table_aliases
-        aliases = {}
-        tables = self.tables
-
-        for token in self._not_parsed_tokens:
-            if (
-                token.last_keyword_normalized in TABLE_ADJUSTMENT_KEYWORDS
-                and (token.is_name or (token.is_keyword and not token.is_as_keyword))
-                and not token.next_token.is_as_keyword
-            ):
-                if token.previous_token.is_as_keyword:
-                    # potential <DB.<SCHEMA>.<TABLE> as <ALIAS>
-                    potential_table_name = token.get_nth_previous(2).value
-                else:
-                    # potential <DB.<SCHEMA>.<TABLE> <ALIAS>
-                    potential_table_name = token.previous_token.value
-
-                if potential_table_name in tables:
-                    token.token_type = TokenType.TABLE_ALIAS
-                    aliases[token.value] = potential_table_name
-
-        self._table_aliases = aliases
+        ast = self._ast_parser.ast
+        assert ast is not None  # guaranteed by prior tables/query_type access
+        extractor = TableExtractor(ast)
+        self._table_aliases = extractor.extract_aliases(self.tables)
         return self._table_aliases
 
-    @property
-    def with_names(self) -> List[str]:  # noqa: C901
-        """
-        Returns with statements aliases list from a given query
+    # -------------------------------------------------------------------
+    # CTEs and subqueries
+    # -------------------------------------------------------------------
 
-        E.g. WITH database1.tableFromWith AS (SELECT * FROM table3)
-             SELECT "xxxxx" FROM database1.tableFromWith alias
-             LEFT JOIN database2.table2 ON ("tt"."ttt"."fff" = "xx"."xxx")
-        will return ["database1.tableFromWith"]
-        """
+    @property
+    def with_names(self) -> list[str]:
+        """Return the CTE (Common Table Expression) names from the query."""
         if self._with_names is not None:
             return self._with_names
-        with_names = UniqueList()
-        for token in self._not_parsed_tokens:
-            if token.previous_token.normalized == "WITH":
-                self._is_in_with_block = True
-                while self._is_in_with_block and token.next_token:
-                    if token.next_token.is_as_keyword:
-                        self._handle_with_name_save(token=token, with_names=with_names)
-                        while token.next_token and not token.is_with_query_end:
-                            token = token.next_token
-                        is_end_of_with_block = (
-                            token.next_token_not_comment is None
-                            or token.next_token_not_comment.normalized
-                            in WITH_ENDING_KEYWORDS
-                        )
-                        if is_end_of_with_block:
-                            self._is_in_with_block = False
-                        elif token.next_token and token.next_token.is_as_keyword:
-                            # Malformed SQL like "... AS (...) AS ..."
-                            raise ValueError("This query is wrong")
-                        else:
-                            # Advance token to prevent infinite loop
-                            token = token.next_token
-                    else:
-                        token = token.next_token
-
-        self._with_names = with_names
+        resolver = self._get_resolver()
+        self._with_names = resolver.extract_cte_names(
+            self._ast_parser.cte_name_map
+        )
         return self._with_names
 
     @property
-    def with_queries(self) -> Dict[str, str]:
-        """
-        Returns "WITH" subqueries with names
-
-        E.g. WITH tableFromWith AS (SELECT * FROM table3)
-             SELECT "xxxxx" FROM database1.tableFromWith alias
-             LEFT JOIN database2.table2 ON ("tt"."ttt"."fff" = "xx"."xxx")
-        will return {"tableFromWith": "SELECT * FROM table3"}
-        """
+    def with_queries(self) -> dict[str, str]:
+        """Return the SQL body for each CTE defined in the query."""
         if self._with_queries is not None:
             return self._with_queries
-        with_queries = {}
-        with_queries_columns = {}
-        for name in self.with_names:
-            token = self.tokens[0].find_nearest_token(
-                name, value_attribute="value", direction="right"
-            )
-            if token.next_token.is_with_columns_start:
-                with_queries_columns[name] = True
-            else:
-                with_queries_columns[name] = False
-            current_with_query = []
-            with_start = token.find_nearest_token(
-                True, value_attribute="is_with_query_start", direction="right"
-            )
-            with_end = with_start.find_nearest_token(
-                True, value_attribute="is_with_query_end", direction="right"
-            )
-            query_token = with_start.next_token
-            while query_token is not None and query_token != with_end:
-                current_with_query.append(query_token)
-                query_token = query_token.next_token
-            with_query_text = "".join([x.stringified_token for x in current_with_query])
-            with_queries[name] = with_query_text
-        self._with_queries = with_queries
-        self._with_queries_columns = with_queries_columns
+        resolver = self._get_resolver()
+        self._with_queries = resolver.extract_cte_bodies(
+            self._ast_parser.cte_name_map
+        )
         return self._with_queries
 
     @property
-    def subqueries(self) -> Dict:
-        """
-        Returns a dictionary with all sub-queries existing in query
-        """
+    def subqueries(self) -> dict[str, str]:
+        """Return the SQL body for each subquery in the query."""
         if self._subqueries is not None:
             return self._subqueries
-        subqueries = {}
-        token = self.tokens[0]
-        while token.next_token:
-            if token.previous_token.is_subquery_start:
-                current_subquery = []
-                current_level = token.subquery_level
-                inner_token = token
-                while (
-                    inner_token.next_token
-                    and not inner_token.next_token.subquery_level < current_level
-                ):
-                    current_subquery.append(inner_token)
-                    inner_token = inner_token.next_token
-
-                query_name = None
-                if inner_token.next_token.value in self.subqueries_names:
-                    query_name = inner_token.next_token.value
-                elif inner_token.next_token.is_as_keyword:
-                    query_name = inner_token.next_token.next_token.value
-
-                subquery_text = "".join([x.stringified_token for x in current_subquery])
-                if query_name is not None:
-                    subqueries[query_name] = subquery_text
-
-            token = token.next_token
-
-        self._subqueries = subqueries
+        ast = self._ast_parser.ast
+        assert ast is not None
+        self._subqueries_names, self._subqueries = (
+            NestedResolver.extract_subqueries(ast)
+        )
         return self._subqueries
 
     @property
-    def subqueries_names(self) -> List[str]:
-        """
-        Returns sub-queries aliases list from a given query
+    def subqueries_names(self) -> list[str]:
+        """Return the names of all subqueries (innermost first).
 
-        e.g. SELECT COUNT(1) FROM
-            (SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1) a
-             JOIN (SELECT st.task_id FROM some_task st WHERE task_type_id = 80) b
-             ON a.task_id = b.task_id;
-        will return ["a", "b"]
+        Aliased subqueries use their alias; unaliased ones get
+        auto-generated names (``subquery_1``, ``subquery_2``, …).
         """
         if self._subqueries_names is not None:
             return self._subqueries_names
-        subqueries_names = UniqueList()
-        for token in self.tokens:
-            if (token.previous_token.is_subquery_end and not token.is_as_keyword) or (
-                token.previous_token.is_as_keyword
-                and token.get_nth_previous(2).is_subquery_end
-            ):
-                token.token_type = TokenType.SUB_QUERY_NAME
-                subqueries_names.append(str(token))
-
-        self._subqueries_names = subqueries_names
+        ast = self._ast_parser.ast
+        assert ast is not None
+        self._subqueries_names, self._subqueries = (
+            NestedResolver.extract_subqueries(ast)
+        )
         return self._subqueries_names
 
+    # -------------------------------------------------------------------
+    # Limit, offset, values
+    # -------------------------------------------------------------------
+
+    @staticmethod
+    def _extract_int_from_node(node: Any) -> int | None:
+        """Safely extract an integer value from a Limit or Offset node."""
+        if not node:
+            return None
+        try:
+            return int(node.expression.this)
+        except (ValueError, AttributeError, TypeError):
+            return None
+
     @property
-    def values(self) -> List:
-        """
-        Returns list of values from insert queries
-        """
+    def limit_and_offset(self) -> tuple[int, int] | None:
+        """Return the LIMIT and OFFSET values, if present."""
+        if self._limit_and_offset is not None:
+            return self._limit_and_offset
+
+        from sqlglot import exp
+
+        ast = self._ast_parser.ast
+        if ast is None:
+            return None
+
+        select = ast if isinstance(ast, exp.Select) else ast.find(exp.Select)
+        if select is None:
+            return None
+
+        limit_val = self._extract_int_from_node(select.args.get("limit"))
+        offset_val = self._extract_int_from_node(select.args.get("offset"))
+
+        if limit_val is None:
+            return self._extract_limit_regex()
+
+        self._limit_and_offset = limit_val, offset_val or 0
+        return self._limit_and_offset
+
+    @property
+    def values(self) -> list[Any]:
+        """Return the list of literal values from INSERT/REPLACE queries."""
         if self._values:
             return self._values
-        values = []
-        for token in self._not_parsed_tokens:
-            if (
-                token.last_keyword_normalized == "VALUES"
-                and token.is_in_parenthesis
-                and token.next_token.is_punctuation
-            ):
-                if token.is_integer:
-                    value = int(token.value)
-                elif token.is_float:
-                    value = float(token.value)
-                else:
-                    value = token.value.strip("'\"")
-                values.append(value)
-        self._values = values
+        self._values = self._extract_values()
         return self._values
 
     @property
-    def values_dict(self) -> Dict:
-        """
-        Returns dictionary of column-value pairs.
-        If columns are not set the auto generated column_<col_number> are added.
-        """
+    def values_dict(self) -> dict[str, Any] | None:
+        """Return column-value pairs from INSERT/REPLACE queries."""
         values = self.values
         if self._values_dict or not values:
             return self._values_dict
-        columns = self.columns
+        try:
+            columns = self.columns
+        # TODO: revisit if .columns starts propagating ValueError to callers
+        except ValueError:  # pragma: no cover
+            columns = []
+
+        is_multi = values and isinstance(values[0], list)
+        first_row = values[0] if is_multi else values
         if not columns:
-            columns = [f"column_{ind + 1}" for ind in range(len(values))]
-        values_dict = dict(zip(columns, values))
-        self._values_dict = values_dict
+            columns = [f"column_{ind + 1}" for ind in range(len(first_row))]
+
+        if is_multi:
+            self._values_dict = {
+                col: [row[i] for row in values] for i, col in enumerate(columns)
+            }
+        else:
+            self._values_dict = dict(zip(columns, values))
         return self._values_dict
 
+    # -------------------------------------------------------------------
+    # Comments and generalization
+    # -------------------------------------------------------------------
+
     @property
-    def comments(self) -> List[str]:
-        """
-        Return comments from SQL query
-        """
-        return [x.value for x in self.tokens if x.is_comment]
+    def comments(self) -> list[str]:
+        """Return all comments from the SQL query."""
+        return extract_comments(self._raw_query)
 
     @property
     def without_comments(self) -> str:
-        """
-        Removes comments from SQL query
-        """
-        return Generalizator(self._raw_query).without_comments
+        """Return the SQL with all comments removed."""
+        return strip_comments(self._raw_query)
 
     @property
     def generalize(self) -> str:
-        """
-        Removes most variables from an SQL query
-        and replaces them with X or N for numbers.
-
-        Based on Mediawiki's DatabaseBase::generalizeSQL
-        """
+        """Return a generalised (anonymised) version of the query."""
         return Generalizator(self._raw_query).generalize
 
-    @property
-    def _not_parsed_tokens(self):
-        """
-        Returns only tokens that have no type assigned yet
-        """
-        return [x for x in self.tokens if x.token_type is None]
-
-    def _handle_column_save(self, token: SQLToken, columns: List[str]):
-        column = token.table_prefixed_column(self.tables_aliases)
-        if self._is_with_query_already_resolved(column):
-            self._add_to_columns_aliases_subsection(token=token, left_expand=False)
-            token.token_type = TokenType.COLUMN_ALIAS
-            return
-        column = self._resolve_sub_queries(column)
-        self._add_to_columns_with_tables(token, column)
-        self._add_to_columns_subsection(
-            keyword=token.last_keyword_normalized, column=column
-        )
-        token.token_type = TokenType.COLUMN
-        columns.extend(column)
-
-    @staticmethod
-    def _handle_with_name_save(token: SQLToken, with_names: List[str]) -> None:
-        if token.is_right_parenthesis:
-            # inside columns of with statement
-            # like: with (col1, col2) as (subquery)
-            token.is_with_columns_end = True
-            token.is_nested_function_end = False
-            start_token = token.find_nearest_token("(")
-            # like: with (col1, col2) as (subquery) as ..., it enters an infinite loop.
-            # return exception
-            if start_token.is_with_query_start:
-                raise ValueError("This query is wrong")  # pragma: no cover
-            start_token.is_with_columns_start = True
-            start_token.is_nested_function_start = False
-            prev_token = start_token.previous_token
-            prev_token.token_type = TokenType.WITH_NAME
-            with_names.append(prev_token.value)
-        else:
-            token.token_type = TokenType.WITH_NAME
-            with_names.append(token.value)
-
-    def _handle_column_alias_subquery_level_update(self, token: SQLToken) -> None:
-        token.token_type = TokenType.COLUMN_ALIAS
-        self._add_to_columns_aliases_subsection(token=token)
-        current_level = self._column_aliases_max_subquery_level.setdefault(
-            token.value, 0
-        )
-        if token.subquery_level > current_level:
-            self._column_aliases_max_subquery_level[token.value] = token.subquery_level
-
-    def _resolve_subquery_alias(self, token: SQLToken) -> Union[str, List[str]]:
-        # nested subquery like select a, (select a as b from x) as column
-        start_token = token.find_nearest_token(
-            True, value_attribute="is_column_definition_start"
-        )
-        if start_token.next_token.normalized == "SELECT":
-            # we have a subquery
-            alias_token = start_token.next_token.find_nearest_token(
-                self._aliases_to_check,
-                direction="right",
-                value_attribute="value",
-            )
-            return self._resolve_alias_to_column(alias_token)
+    # -------------------------------------------------------------------
+    # Internal extraction helpers
+    # -------------------------------------------------------------------
 
-        # chain of functions or redundant parenthesis
-        return self._find_all_columns_between_tokens(
-            start_token=start_token, end_token=token
-        )
+    def _extract_values(self) -> list[Any]:
+        """Extract literal values from INSERT/REPLACE query AST."""
+        from sqlglot import exp
 
-    def _resolve_function_alias(self, token: SQLToken) -> Union[str, List[str]]:
-        # it can be one function or a chain of functions
-        # like: sum(a) + sum(b) as alias
-        # or operation on columns like: col1 + col2 as alias
-        start_token = token.find_nearest_token(
-            [",", "SELECT"], value_attribute="normalized"
-        )
-        while start_token.is_in_nested_function:
-            start_token = start_token.find_nearest_token(
-                [",", "SELECT"], value_attribute="normalized"
-            )
-        return self._find_all_columns_between_tokens(
-            start_token=start_token, end_token=token
-        )
-
-    def _add_to_columns_subsection(self, keyword: str, column: Union[str, List[str]]):
-        """
-        Add columns to the section in which it appears in query
-        """
-        section = COLUMNS_SECTIONS[keyword]
-        self._columns_dict = self._columns_dict or {}
-        current_section = self._columns_dict.setdefault(section, UniqueList())
-        if isinstance(column, str):
-            current_section.append(column)
-        else:
-            current_section.extend(column)
-
-    def _add_to_columns_aliases_subsection(
-        self, token: SQLToken, left_expand: bool = True
-    ) -> None:
-        """
-        Add alias to the section in which it appears in query
-        """
-        keyword = token.last_keyword_normalized
-        alias = token.value if left_expand else token.value.split(".")[-1]
-        if (
-            token.last_keyword_normalized in ["FROM", "WITH"]
-            and token.find_nearest_token("(").is_with_columns_start
-        ):
-            keyword = "SELECT"
-        section = COLUMNS_SECTIONS[keyword]
-        self._columns_aliases_dict = self._columns_aliases_dict or {}
-        self._columns_aliases_dict.setdefault(section, UniqueList()).append(alias)
-
-    def _add_to_columns_with_tables(
-        self, token: SQLToken, column: Union[str, List[str]]
-    ) -> None:
-        if isinstance(column, list) and len(column) == 1:
-            column = column[0]
-        self._columns_with_tables_aliases[token.value] = column
-
-    def _resolve_column_alias(
-        self, alias: Union[str, List[str]], visited: Set = None
-    ) -> Union[str, List]:
-        """
-        Returns a column name for a given alias
-        """
-        visited = visited or set()
-        if isinstance(alias, list):
-            return [self._resolve_column_alias(x, visited) for x in alias]
-        while alias in self.columns_aliases and alias not in visited:
-            visited.add(alias)
-            alias = self.columns_aliases[alias]
-            if isinstance(alias, list):
-                return self._resolve_column_alias(alias, visited)
-        return alias
-
-    def _resolve_alias_to_column(self, alias_token: SQLToken) -> str:
-        """
-        Resolves aliases of tables to already resolved columns
-        """
-        if alias_token.value in self._columns_with_tables_aliases:
-            alias_of = self._columns_with_tables_aliases[alias_token.value]
-        else:
-            alias_of = alias_token.value
-        return alias_of
-
-    def _resolve_sub_queries(self, column: str) -> List[str]:
-        """
-        Resolve column names coming from sub queries and with queries to actual
-        column names as they appear in the query
-        """
-        column = self._resolve_nested_query(
-            subquery_alias=column,
-            nested_queries_names=self.subqueries_names,
-            nested_queries=self.subqueries,
-            already_parsed=self._subqueries_parsers,
-        )
-        if isinstance(column, str):
-            column = self._resolve_nested_query(
-                subquery_alias=column,
-                nested_queries_names=self.with_names,
-                nested_queries=self.with_queries,
-                already_parsed=self._with_parsers,
-            )
-        return column if isinstance(column, list) else [column]
+        try:
+            ast = self._ast_parser.ast
+        except ValueError:
+            return []
+
+        if ast is None:
+            return []
+
+        values_node = ast.find(exp.Values)
+        if not values_node:
+            return []
+
+        rows = []
+        for tup in values_node.expressions:
+            if isinstance(tup, exp.Tuple):
+                rows.append([self._convert_value(val) for val in tup.expressions])
+            # TODO: revisit if sqlglot stops wrapping VALUES items in Tuple
+            else:  # pragma: no cover
+                rows.append([self._convert_value(tup)])
+        if len(rows) == 1:
+            return rows[0]
+        return rows
 
     @staticmethod
-    # pylint:disable=too-many-return-statements
-    def _resolve_nested_query(  # noqa: C901
-        subquery_alias: str,
-        nested_queries_names: List[str],
-        nested_queries: Dict,
-        already_parsed: Dict,
-    ) -> Union[str, List[str]]:
-        """
-        Resolves subquery reference to the actual column in the subquery
-        """
-        parts = subquery_alias.split(".")
-        if len(parts) != 2 or parts[0] not in nested_queries_names:
-            return subquery_alias
-        sub_query, column_name = parts[0], parts[-1]
-        sub_query_definition = nested_queries.get(sub_query)
-        subparser = already_parsed.setdefault(sub_query, Parser(sub_query_definition))
-        # in subquery you cannot have more than one column with given name
-        # so it either has to have an alias or only one column with given name exists
-        if column_name in subparser.columns_aliases_names:
-            resolved_column = subparser._resolve_column_alias(  # pylint: disable=W0212
-                column_name
-            )
-            if isinstance(resolved_column, list):
-                resolved_column = flatten_list(resolved_column)
-                return resolved_column
-            return [resolved_column]
+    def _convert_value(val: exp.Expression) -> int | float | str:
+        """Convert a sqlglot literal AST node to a Python type."""
+        from sqlglot import exp
+
+        if isinstance(val, exp.Literal):
+            if val.is_int:
+                return int(val.this)
+            if val.is_number:
+                return float(val.this)
+            return str(val.this)
+        if isinstance(val, exp.Neg):
+            inner = val.this
+            if isinstance(inner, exp.Literal):
+                if inner.is_int:
+                    return -int(inner.this)
+                return -float(inner.this)
+        return str(val)
+
+    def _extract_limit_regex(self) -> tuple[int, int] | None:
+        """Extract LIMIT and OFFSET using regex as a fallback."""
+        sql = strip_comments(self._raw_query)
+        match = re.search(r"LIMIT\s+(\d+)\s*,\s*(\d+)", sql, re.IGNORECASE)
+        if match:
+            offset_val = int(match.group(1))
+            limit_val = int(match.group(2))
+            self._limit_and_offset = limit_val, offset_val
+            return self._limit_and_offset
 
-        if column_name == "*":
-            return subparser.columns
-        try:
-            column_index = [x.split(".")[-1] for x in subparser.columns].index(
-                column_name
-            )
-        except ValueError as exc:
-            # handle case when column name is used but subquery select all by wildcard
-            if "*" in subparser.columns:
-                return column_name
-            for table in subparser.tables:
-                if f"{table}.*" in subparser.columns:
-                    return column_name
-            raise exc  # pragma: no cover
-        resolved_column = subparser.columns[column_index]
-        return [resolved_column]
-
-    def _is_with_query_already_resolved(self, col_alias: str) -> bool:
-        """
-        Checks if columns comes from a with query that has columns defined
-        cause if it does that means that column name is an alias and is already
-        resolved in aliases.
-        """
-        parts = col_alias.split(".")
-        if len(parts) != 2 or parts[0] not in self.with_names:
-            return False
-        if self._with_queries_columns.get(parts[0]):
-            return True
-        return False
-
-    def _determine_opening_parenthesis_type(self, token: SQLToken):
-        """
-        Determines the type of left parenthesis in query
-        """
-        if token.previous_token.normalized in SUBQUERY_PRECEDING_KEYWORDS:
-            # inside subquery / derived table
-            token.is_subquery_start = True
-            self._subquery_level += 1
-            self._preceded_keywords.append(token.last_keyword_normalized)
-            token.subquery_level = self._subquery_level
-        elif token.previous_token.normalized in KEYWORDS_BEFORE_COLUMNS.union({","}):
-            # we are in columns and in a column subquery definition
-            token.is_column_definition_start = True
-        elif (
-            token.previous_token_not_comment.is_as_keyword
-            and token.last_keyword_normalized != "WINDOW"
-        ):
-            # window clause also contains AS keyword, but it is not a query
-            token.is_with_query_start = True
-        elif (
-            token.last_keyword_normalized == "TABLE"
-            and token.find_nearest_token("(") is EmptyToken
-        ):
-            token.is_create_table_columns_declaration_start = True
-        elif token.previous_token.normalized == "OVER":
-            token.is_partition_clause_start = True
-        else:
-            # nested function
-            token.is_nested_function_start = True
-            self._nested_level += 1
-            self._is_in_nested_function = True
-        self._open_parentheses.append(token)
-        self._parenthesis_level += 1
-
-    def _determine_closing_parenthesis_type(self, token: SQLToken):
-        """
-        Determines the type of right parenthesis in query
-        """
-        last_open_parenthesis = self._open_parentheses.pop(-1)
-        if last_open_parenthesis.is_subquery_start:
-            token.is_subquery_end = True
-            self._subquery_level -= 1
-        elif last_open_parenthesis.is_column_definition_start:
-            token.is_column_definition_end = True
-        elif last_open_parenthesis.is_with_query_start:
-            token.is_with_query_end = True
-        elif last_open_parenthesis.is_create_table_columns_declaration_start:
-            token.is_create_table_columns_declaration_end = True
-        elif last_open_parenthesis.is_partition_clause_start:
-            token.is_partition_clause_end = True
-        else:
-            token.is_nested_function_end = True
-            self._nested_level -= 1
-            if self._nested_level == 0:
-                self._is_in_nested_function = False
-        self._parenthesis_level -= 1
-
-    def _find_column_for_with_column_alias(self, token: SQLToken) -> str:
-        start_token = token.find_nearest_token(
-            True, direction="right", value_attribute="is_with_query_start"
+        match = re.search(
+            r"LIMIT\s+(\d+)(?:\s+OFFSET\s+(\d+))?",
+            sql,
+            re.IGNORECASE,
         )
-        if start_token not in self._with_columns_candidates:
-            end_token = start_token.find_nearest_token(
-                True, direction="right", value_attribute="is_with_query_end"
-            )
-            columns = self._find_all_columns_between_tokens(
-                start_token=start_token, end_token=end_token
-            )
-            self._with_columns_candidates[start_token] = columns
-        if isinstance(self._with_columns_candidates[start_token], list):
-            alias_of = self._with_columns_candidates[start_token].pop(0)
-        else:
-            alias_of = self._with_columns_candidates[start_token]
-        return alias_of
-
-    def _find_all_columns_between_tokens(
-        self, start_token: SQLToken, end_token: SQLToken
-    ) -> Union[str, List[str]]:
-        """
-        Returns a list of columns between two tokens
-        """
-        loop_token = start_token
-        aliases = UniqueList()
-        while loop_token.next_token != end_token:
-            if loop_token.next_token.value in self._aliases_to_check:
-                alias_token = loop_token.next_token
-                if (
-                    alias_token.normalized != "*"
-                    or alias_token.is_wildcard_not_operator
-                ):
-                    aliases.append(self._resolve_alias_to_column(alias_token))
-            loop_token = loop_token.next_token
-        return aliases[0] if len(aliases) == 1 else aliases
-
-    def _preprocess_query(self) -> str:
-        """
-        Perform initial query cleanup
-        """
-        if self._raw_query == "":
-            return ""
-
-        # python re does not have variable length look back/forward
-        # so we need to replace all the " (double quote) for a
-        # temporary placeholder as we DO NOT want to replace those
-        # in the strings as this is something that user provided
-        def replace_quotes_in_string(match):
-            return re.sub('"', "<!!__QUOTE__!!>", match.group())
-
-        def replace_back_quotes_in_string(match):
-            return re.sub("<!!__QUOTE__!!>", '"', match.group())
-
-        # unify quoting in queries, replace double quotes to backticks
-        # it's best to keep the quotes as they can have keywords
-        # or digits at the beginning so we only strip them in SQLToken
-        # as double quotes are not properly handled in sqlparse
-        query = re.sub(r"'.*?'", replace_quotes_in_string, self._raw_query)
-        query = re.sub(r'"([^`]+?)"', r"`\1`", query)
-        query = re.sub(r"'.*?'", replace_back_quotes_in_string, query)
-
-        return query
-
-    def _determine_last_relevant_keyword(self, token: SQLToken, last_keyword: str):
-        if token.value == "," and token.last_keyword_normalized == "ON":
-            return "FROM"
-        if token.is_keyword and "".join(token.normalized.split()) in RELEVANT_KEYWORDS:
-            if (
-                not (
-                    token.normalized == "FROM"
-                    and token.get_nth_previous(3).normalized == "EXTRACT"
-                )
-                and not (
-                    token.normalized == "ORDERBY"
-                    and len(self._open_parentheses) > 0
-                    and self._open_parentheses[-1].is_partition_clause_start
-                )
-                and not (token.normalized == "USING" and last_keyword == "SELECT")
-                and not (token.normalized == "IFNOTEXISTS")
-            ):
-                last_keyword = token.normalized
-        return last_keyword
-
-    def _is_token_part_of_complex_identifier(
-        self, token: sqlparse.tokens.Token, index: int
-    ) -> bool:
-        """
-        Checks if token is a part of complex identifier like
-        <schema>.<table>.<column> or <table/sub_query>.<column>
-        """
-        if token.is_keyword:
-            return False
-        return str(token) == "." or (
-            index + 1 < self.tokens_length
-            and str(self.non_empty_tokens[index + 1]) == "."
+        if match:
+            limit_val = int(match.group(1))
+            offset_val = int(match.group(2)) if match.group(2) else 0
+            self._limit_and_offset = limit_val, offset_val
+            return self._limit_and_offset
+        return None
+
+    def _extract_columns_regex(self) -> list[str]:
+        """Extract column names from ``INTO ... (col1, col2)`` using regex."""
+        match = re.search(
+            r"INTO\s+\S+\s*\(([^)]+)\)",
+            self._raw_query,
+            re.IGNORECASE,
         )
-
-    def _combine_qualified_names(self, index: int, token: SQLToken) -> None:
-        """
-        Combines names like <schema>.<table>.<column> or <table/sub_query>.<column>
-        """
-        value = token.value
-        is_complex = True
-        while is_complex:
-            value, is_complex = self._combine_tokens(index=index, value=value)
-            index = index - 1
-        token.value = value
-
-    def _combine_tokens(self, index: int, value: str) -> Tuple[str, bool]:
-        """
-        Checks if complex identifier is longer and follows back until it's finished
-        """
-        if index > 1:
-            prev_value = self.non_empty_tokens[index - 1]
-            if not self._is_token_part_of_complex_identifier(prev_value, index - 1):
-                return value, False
-            prev_value = str(prev_value).strip("`")
-            value = f"{prev_value}{value}"
-            return value, True
-        return value, False
-
-    def _get_sqlparse_tokens(self, parsed) -> None:
-        """
-        Flattens the tokens and removes whitespace
-        """
-        self.sqlparse_tokens = parsed[0].tokens
-        sqlparse_tokens = self._flatten_sqlparse()
-        self.non_empty_tokens = [
-            token
-            for token in sqlparse_tokens
-            if token.ttype is not Whitespace and token.ttype.parent is not Whitespace
-        ]
-        self.tokens_length = len(self.non_empty_tokens)
-
-    def _flatten_sqlparse(self):
-        for token in self.sqlparse_tokens:
-            # sqlparse returns mysql digit starting identifiers as group
-            # check https://github.com/andialbrecht/sqlparse/issues/337
-            is_grouped_mysql_digit_name = (
-                token.is_group
-                and len(token.tokens) == 2
-                and token.tokens[0].ttype is Number.Integer
-                and (
-                    token.tokens[1].is_group and token.tokens[1].tokens[0].ttype is Name
-                )
-            )
-            if token.is_group and not is_grouped_mysql_digit_name:
-                yield from token.flatten()
-            elif is_grouped_mysql_digit_name:
-                # we have digit starting name
-                new_tok = Token(
-                    value=f"{token.tokens[0].normalized}"
-                    f"{token.tokens[1].tokens[0].normalized}",
-                    ttype=token.tokens[1].tokens[0].ttype,
-                )
-                new_tok.parent = token.parent
-                yield new_tok
-                if len(token.tokens[1].tokens) > 1:
-                    # unfortunately there might be nested groups
-                    remaining_tokens = token.tokens[1].tokens[1:]
-                    for tok in remaining_tokens:
-                        if tok.is_group:
-                            yield from tok.flatten()
-                        else:
-                            yield tok
-            else:
-                yield token
-
-    @staticmethod
-    def _get_switch_by_create_query(tokens: List[SQLToken], index: int) -> str:
-        """
-        Return the switch that creates query type.
-        """
-        switch = tokens[index].normalized + tokens[index + 1].normalized
-
-        # Hive CREATE FUNCTION
-        if any(
-            index + i < len(tokens) and tokens[index + i].normalized == "FUNCTION"
-            for i in (1, 2)
-        ):
-            switch = "CREATEFUNCTION"
-
-        return switch
-
-    @staticmethod
-    def _parse(sql: str) -> Tuple[sqlparse.sql.Statement]:
-        """
-        Parse the SQL query using sqlparse library
-        """
-        return sqlparse.parse(sql)
+        if not match:
+            return []
+        cols = []
+        for col in match.group(1).split(","):
+            col = col.strip().strip("`").strip('"').strip("'")
+            if col:
+                cols.append(col)
+        return cols
+
+    def _resolve_column_alias(self, alias: str | list[str]) -> list[str]:
+        """Recursively resolve a column alias (delegates to NestedResolver)."""
+        resolver = self._get_resolver()
+        return resolver.resolve_column_alias(alias, self.columns_aliases)
diff --git a/sql_metadata/py.typed b/sql_metadata/py.typed
new file mode 100644
index 00000000..e69de29b
diff --git a/sql_metadata/query_type_extractor.py b/sql_metadata/query_type_extractor.py
new file mode 100644
index 00000000..6d13ff5a
--- /dev/null
+++ b/sql_metadata/query_type_extractor.py
@@ -0,0 +1,114 @@
+"""Extract the query type from a sqlglot AST root node.
+
+The :class:`QueryTypeExtractor` class maps the top-level AST node to a
+:class:`QueryType` enum value, handling parenthesised wrappers, set
+operations, and opaque ``Command`` nodes.
+"""
+
+import logging
+from typing import NoReturn
+
+from sqlglot import exp
+
+from sql_metadata.exceptions import InvalidQueryDefinition
+from sql_metadata.keywords_lists import QueryType
+
+logger = logging.getLogger(__name__)
+
+
+#: Direct AST type → QueryType mapping for simple cases.
+_SIMPLE_TYPE_MAP = {
+    exp.Select: QueryType.SELECT,
+    exp.Union: QueryType.SELECT,
+    exp.Intersect: QueryType.SELECT,
+    exp.Except: QueryType.SELECT,
+    exp.Insert: QueryType.INSERT,
+    exp.Update: QueryType.UPDATE,
+    exp.Delete: QueryType.DELETE,
+    exp.Create: QueryType.CREATE,
+    exp.Alter: QueryType.ALTER,
+    exp.Drop: QueryType.DROP,
+    exp.TruncateTable: QueryType.TRUNCATE,
+    exp.Merge: QueryType.MERGE,
+}
+
+
+class QueryTypeExtractor:
+    """Determine the query type from a sqlglot AST root node.
+
+    :param ast: Root AST node (may be ``None``).
+    :param raw_query: Original SQL string (for error messages).
+    """
+
+    def __init__(
+        self,
+        ast: exp.Expression | None,
+        raw_query: str,
+    ):
+        self._ast = ast
+        self._raw_query = raw_query
+
+    def extract(self) -> QueryType:
+        """Determine the :class:`QueryType` for the parsed SQL.
+
+        :returns: The detected query type.
+        :raises ValueError: If the query is empty, malformed, or
+            unsupported.
+        """
+        if self._ast is None:
+            self._raise_for_none_ast()
+
+        root = self._unwrap_parens(self._ast)
+        node_type = type(root)
+
+        if node_type is exp.With:
+            raise InvalidQueryDefinition(
+                "WITH clause without a main statement is not valid SQL"
+            )
+
+        simple = _SIMPLE_TYPE_MAP.get(node_type)
+        if simple is not None:
+            return simple
+
+        if node_type is exp.Command:
+            result = self._resolve_command_type(root)
+            if result is not None:
+                return result
+
+        shorten_query = " ".join(self._raw_query.split(" ")[:3])
+        logger.error("Not supported query type: %s", shorten_query)
+        raise InvalidQueryDefinition("Not supported query type!")
+
+    @staticmethod
+    def _unwrap_parens(ast: exp.Expression) -> exp.Expression:
+        """Remove Paren and Subquery wrappers to reach the real statement."""
+        # TODO: revisit if sqlglot stops stripping outer parens before this is called
+        if isinstance(ast, (exp.Paren, exp.Subquery)):  # pragma: no cover
+            return QueryTypeExtractor._unwrap_parens(ast.this)
+        return ast
+
+    @staticmethod
+    def _resolve_command_type(root: exp.Expression) -> QueryType | None:
+        """Determine query type for an opaque ``exp.Command`` node.
+
+        Hive ``CREATE FUNCTION ... USING JAR ... WITH SERDEPROPERTIES``
+        is not supported by any sqlglot dialect and degrades to
+        ``exp.Command(this='CREATE', ...)``.  This fallback extracts
+        the query type from the command text so callers still get
+        ``QueryType.CREATE``.
+        """
+        expression_text = str(root.this).upper() if root.this else ""
+        if expression_text == "CREATE":
+            return QueryType.CREATE
+        return None
+
+    def _raise_for_none_ast(self) -> "NoReturn":
+        """Raise an appropriate error when the AST is None."""
+        from sql_metadata.comments import strip_comments
+
+        stripped = strip_comments(self._raw_query) if self._raw_query else ""
+        if stripped.strip():
+            raise InvalidQueryDefinition(
+                "Could not parse the query — the SQL syntax appears to be invalid"
+            )
+        raise InvalidQueryDefinition("Empty queries are not supported!")
diff --git a/sql_metadata/sql_cleaner.py b/sql_metadata/sql_cleaner.py
new file mode 100644
index 00000000..fd1dbe72
--- /dev/null
+++ b/sql_metadata/sql_cleaner.py
@@ -0,0 +1,180 @@
+"""Raw SQL preprocessing before AST construction.
+
+Pure string transformations — no sqlglot dependency.  Handles comment
+stripping, ``REPLACE INTO`` rewriting, qualified CTE name normalisation,
+DB2 isolation-level clauses, malformed-query rejection, and redundant
+outer-parenthesis removal.
+"""
+
+import itertools
+import re
+from typing import NamedTuple
+
+from sql_metadata.comments import strip_comments_for_parsing as _strip_comments
+from sql_metadata.exceptions import InvalidQueryDefinition
+from sql_metadata.utils import DOT_PLACEHOLDER
+
+
+class CleanResult(NamedTuple):
+    """Result of :meth:`SqlCleaner.clean`."""
+
+    sql: str | None
+    is_replace: bool
+    cte_name_map: dict[str, str]
+
+
+def _strip_outer_parens(sql: str) -> str:
+    """Strip redundant outer parentheses from *sql*.
+
+    Needed because sqlglot cannot parse double-wrapped non-SELECT
+    statements like ``((UPDATE ...))``.  Uses ``itertools.accumulate``
+    to verify balanced parens in one pass, with recursion for nesting.
+    """
+    s = sql.strip()
+
+    def _is_wrapped(text: str) -> bool:
+        if len(text) < 2 or text[0] != "(" or text[-1] != ")":
+            return False
+        inner = text[1:-1]
+        depths = list(
+            itertools.accumulate(
+                (1 if c == "(" else -1 if c == ")" else 0) for c in inner
+            )
+        )
+        return not depths or min(depths) >= 0
+
+    if _is_wrapped(s):
+        return _strip_outer_parens(s[1:-1].strip())
+    return s
+
+
+def _normalize_cte_names(sql: str) -> tuple[str, dict[str, str]]:
+    """Replace qualified CTE names with simple placeholders.
+
+    sqlglot cannot parse ``WITH db.cte_name AS (...)`` because it
+    interprets ``db.cte_name`` as a table reference.  This function
+    rewrites such names to ``db__DOT__cte_name`` and returns a mapping
+    so that the original qualified names can be restored after extraction.
+
+    :param sql: SQL string that may contain qualified CTE names.
+    :type sql: str
+    :returns: A 2-tuple of ``(modified_sql, {placeholder: original_name})``.
+    :rtype: tuple
+    """
+    name_map = {}
+    # Find WITH ... AS patterns with qualified names
+    pattern = re.compile(
+        r"(\bWITH\s+|,\s*)(\w+\.\w+)(\s+AS\s*\()",
+        re.IGNORECASE,
+    )
+
+    def replacer(match: re.Match[str]) -> str:
+        prefix = match.group(1)
+        qualified_name = match.group(2)
+        suffix = match.group(3)
+        placeholder = qualified_name.replace(".", DOT_PLACEHOLDER)
+        name_map[placeholder] = qualified_name
+        return f"{prefix}{placeholder}{suffix}"
+
+    modified = pattern.sub(replacer, sql)
+
+    # Also replace references to qualified CTE names in FROM/JOIN clauses
+    for placeholder, original in name_map.items():
+        # Replace references but not the definition (already replaced)
+        # Use word boundary to avoid partial matches
+        modified = re.sub(
+            r"\b" + re.escape(original) + r"\b",
+            placeholder,
+            modified,
+        )
+
+    return modified, name_map
+
+
+class SqlCleaner:
+    """Preprocess raw SQL strings before dialect parsing."""
+
+    @staticmethod
+    def clean(sql: str) -> CleanResult:
+        """Apply all preprocessing steps to raw SQL.
+
+        Steps (in order):
+
+        1. Rewrite ``REPLACE INTO`` → ``INSERT INTO``.
+        2. Rewrite ``SELECT...INTO var FROM`` → ``SELECT...FROM``.
+        3. Strip comments.
+        4. Normalise qualified CTE names.
+        5. Strip DB2 isolation-level clauses.
+        6. Detect malformed ``WITH...AS(...)  AS`` patterns.
+        7. Strip redundant outer parentheses.
+
+        :param sql: Raw SQL string.
+        :type sql: str
+        :returns: Cleaning result with preprocessed SQL (``None`` if
+            effectively empty), replace flag, and CTE name map.
+        :rtype: CleanResult
+        :raises ValueError: If a malformed WITH pattern is detected.
+        """
+        is_replace = False
+        if re.match(r"\s*REPLACE\b", sql, re.IGNORECASE):
+            sql = re.sub(
+                r"\bREPLACE\s+INTO\b",
+                "INSERT INTO",
+                sql,
+                count=1,
+                flags=re.IGNORECASE,
+            )
+            is_replace = True
+
+        # Rewrite SELECT...INTO var1,var2 FROM → SELECT...FROM
+        # so sqlglot doesn't treat variables as tables.
+        sql = re.sub(
+            r"(?i)(\bSELECT\b.+?)\bINTO\b.+?\bFROM\b",
+            r"\1FROM",
+            sql,
+            count=1,
+            flags=re.DOTALL,
+        )
+
+        clean_sql = _strip_comments(sql)
+        if not clean_sql.strip():
+            return CleanResult(sql=None, is_replace=is_replace, cte_name_map={})
+
+        clean_sql, cte_name_map = _normalize_cte_names(clean_sql)
+        clean_sql = re.sub(
+            r"\bwith\s+(ur|cs|rs|rr)\s*$", "", clean_sql, flags=re.IGNORECASE
+        ).strip()
+
+        SqlCleaner._detect_malformed_with(clean_sql)
+
+        clean_sql = _strip_outer_parens(clean_sql)
+        if not clean_sql.strip():
+            return CleanResult(
+                sql=None, is_replace=is_replace, cte_name_map=cte_name_map
+            )
+
+        return CleanResult(
+            sql=clean_sql, is_replace=is_replace, cte_name_map=cte_name_map
+        )
+
+    @staticmethod
+    def _detect_malformed_with(clean_sql: str) -> None:
+        """Raise ``ValueError`` if the SQL contains a malformed WITH pattern.
+
+        Detects ``WITH...AS(...)  AS <keyword>`` or
+        ``WITH...AS(...)  AS <word> <keyword>`` — an extra ``AS`` token
+        after the CTE body that indicates malformed SQL.
+
+        :param clean_sql: Preprocessed SQL string.
+        :type clean_sql: str
+        :raises ValueError: If a malformed WITH pattern is found.
+        """
+        if not re.match(r"\s*WITH\b", clean_sql, re.IGNORECASE):
+            return
+        main_kw = r"(?:SELECT|INSERT|UPDATE|DELETE)"
+        if re.search(
+            r"\)\s+AS\s+" + main_kw + r"\b", clean_sql, re.IGNORECASE
+        ) or re.search(r"\)\s+AS\s+\w+\s+" + main_kw + r"\b", clean_sql, re.IGNORECASE):
+            raise InvalidQueryDefinition(
+                "Malformed WITH clause — extra AS keyword after CTE body"
+            )
diff --git a/sql_metadata/table_extractor.py b/sql_metadata/table_extractor.py
new file mode 100644
index 00000000..04321536
--- /dev/null
+++ b/sql_metadata/table_extractor.py
@@ -0,0 +1,461 @@
+"""Extract tables and table aliases from a sqlglot AST.
+
+The :class:`TableExtractor` class walks the AST for ``exp.Table`` and
+``exp.Lateral`` nodes, builds fully-qualified table names (optionally
+preserving ``[bracket]`` notation for TSQL), and sorts results by their
+first occurrence in the raw SQL so the output order matches left-to-right
+reading order.  CTE names are excluded from the result so that only *real*
+tables are reported.
+"""
+
+import re
+
+from sqlglot import exp
+from sqlglot.dialects.dialect import DialectType
+
+from sql_metadata.utils import UniqueList
+
+# ---------------------------------------------------------------------------
+# Pure static helpers (no instance state needed)
+# ---------------------------------------------------------------------------
+
+
+def _assemble_dotted_name(
+    catalog: str, db: str, name: str, *, preserve_empty: bool = False
+) -> str:
+    """Assemble a dot-joined table name from catalog, db, and name parts.
+
+    When *preserve_empty* is ``True``, empty segments are kept so that
+    double-dot notation (e.g. ``server..table``) is preserved.
+
+    .. code-block:: sql
+
+       -- preserve_empty=False (default)
+       SELECT * FROM mydb.dbo.users   -- → "mydb.dbo.users"
+       -- preserve_empty=True
+       SELECT * FROM server..users    -- → "server..users"
+
+    :param catalog: Catalog / server segment (may be empty).
+    :param db: Database / schema segment (may be empty).
+    :param name: Table name segment.
+    :param preserve_empty: Keep empty segments for double-dot notation.
+    :returns: Dot-joined name string.
+    """
+    return ".".join(
+        part for part in [catalog, db, name] if part or preserve_empty
+    )
+
+
+def _ident_str(node: exp.Identifier) -> str:
+    """Return an identifier string, wrapping it in ``[brackets]`` if quoted.
+
+    TSQL uses square brackets for quoting — this helper preserves that
+    notation so the output matches the original SQL style.
+
+    .. code-block:: sql
+
+       SELECT * FROM [dbo].[Users]  -- → "[dbo]", "[Users]"
+       SELECT * FROM dbo.Users      -- → "dbo", "Users"
+
+    :param node: An ``exp.Identifier`` AST node.
+    :returns: The identifier text, optionally bracket-wrapped.
+    """
+    return f"[{node.name}]" if node.quoted else node.name
+
+
+def _collect_node_parts(node: object, parts: list[str]) -> None:
+    """Append identifier strings from *node* into *parts*.
+
+    Handles both simple ``exp.Identifier`` nodes and ``exp.Dot`` nodes
+    that contain two identifiers (e.g. ``schema.table``).
+
+    :param node: An AST node — either ``exp.Identifier`` or ``exp.Dot``.
+    :param parts: Accumulator list to append identifier strings into.
+    """
+    if isinstance(node, exp.Identifier):
+        # e.g. SELECT * FROM [Users] — single identifier
+        parts.append(_ident_str(node))
+    elif isinstance(node, exp.Dot):
+        # e.g. SELECT * FROM [dbo].[Users] — dotted pair
+        for sub in [node.this, node.expression]:
+            if isinstance(sub, exp.Identifier):
+                parts.append(_ident_str(sub))
+
+
+def _bracketed_full_name(table: exp.Table) -> str:
+    """Build a table name preserving ``[bracket]`` notation from AST nodes.
+
+    Walks the ``catalog``, ``db``, and ``this`` args of an ``exp.Table``
+    node, collecting bracket-preserved identifier parts.
+
+    .. code-block:: sql
+
+       SELECT * FROM [mydb].[dbo].[Users]  -- → "[mydb].[dbo].[Users]"
+       SELECT * FROM [Users]               -- → "[Users]"
+
+    :param table: An ``exp.Table`` AST node.
+    :returns: Dot-joined bracket-preserved name, or ``""`` if no parts found.
+    """
+    parts: list[str] = []
+    for key in ["catalog", "db", "this"]:
+        node = table.args.get(key)
+        if node is not None:
+            _collect_node_parts(node, parts)
+    return ".".join(parts) if parts else ""
+
+
+def _ends_with_table_keyword(before: str) -> bool:
+    """Check whether *before* ends with a table-introducing keyword.
+
+    Used to determine if a table name appears right after ``FROM``,
+    ``JOIN``, ``TABLE``, ``INTO``, or ``UPDATE``.
+
+    :param before: Upper-cased SQL text preceding the candidate table name.
+    :returns: ``True`` if the text ends with a table keyword.
+    """
+    return any(before.endswith(kw) for kw in _TABLE_CONTEXT_KEYWORDS)
+
+
+def _is_in_comma_list_after_keyword(before: str) -> bool:
+    """Check whether a comma-preceded name belongs to a table list.
+
+    Looks backward for the nearest table-introducing keyword (e.g. ``FROM``)
+    and verifies that no interrupting keyword (e.g. ``WHERE``, ``SELECT``)
+    appears between it and the comma.  This handles multi-table ``FROM``
+    clauses.
+
+    .. code-block:: sql
+
+       SELECT * FROM t1, t2, t3  -- t2 and t3 are in comma list after FROM
+
+    :param before: Upper-cased SQL text preceding the comma + candidate name.
+    :returns: ``True`` if the name is part of a comma-separated table list.
+    """
+    best_kw_pos = -1
+    for kw in _TABLE_CONTEXT_KEYWORDS:
+        kw_pos = before.rfind(kw)
+        if kw_pos > best_kw_pos:
+            best_kw_pos = kw_pos
+    if best_kw_pos < 0:
+        # no table keyword found at all
+        return False
+    between = before[best_kw_pos:]
+    # e.g. FROM t1 WHERE ... , x — WHERE interrupts, so x is not a table
+    return not any(ik in between for ik in _INTERRUPTING_KEYWORDS)
+
+
+#: SQL keywords that introduce a table-name context.
+_TABLE_CONTEXT_KEYWORDS = {"FROM", "JOIN", "TABLE", "INTO", "UPDATE"}
+
+#: Keywords that interrupt a comma-separated table list.
+_INTERRUPTING_KEYWORDS = {"SELECT", "WHERE", "ORDER", "GROUP", "HAVING", "SET"}
+
+
+# ---------------------------------------------------------------------------
+# TableExtractor class
+# ---------------------------------------------------------------------------
+
+
+class TableExtractor:
+    """Extract table names and aliases from a sqlglot AST.
+
+    Encapsulates the raw SQL string and AST needed for position-based
+    table sorting, bracket-mode detection, and CTE name filtering.
+
+    The extraction pipeline:
+
+    1. Collect all ``exp.Table`` nodes from the AST.
+    2. Build fully-qualified names (with bracket preservation for TSQL).
+    3. Filter out CTE names so only real tables are reported.
+    4. Sort by first occurrence in the raw SQL for left-to-right order.
+
+    :param ast: Root AST node produced by sqlglot.
+    :param raw_sql: Original SQL string, used for position-based sorting.
+    :param cte_names: Set of CTE names to exclude from the result.
+    :param dialect: The dialect used to parse the AST.
+    """
+
+    def __init__(
+        self,
+        ast: exp.Expression,
+        raw_sql: str = "",
+        cte_names: set[str] | None = None,
+        dialect: DialectType = None,
+    ):
+        self._ast = ast
+        self._raw_sql = raw_sql
+        self._upper_sql = raw_sql.upper()
+        self._cte_names = cte_names or set()
+
+        from sql_metadata.dialect_parser import BracketedTableDialect
+
+        self._bracket_mode = isinstance(dialect, type) and issubclass(
+            dialect, BracketedTableDialect
+        )
+        self._cached_table_nodes: list[exp.Table] | None = None
+
+    # -------------------------------------------------------------------
+    # Public API
+    # -------------------------------------------------------------------
+
+    def extract(self) -> list[str]:
+        """Extract table names, excluding CTE definitions.
+
+        For ``CREATE TABLE`` statements, the target table is always placed
+        first in the result regardless of its position in the SQL text.
+        All other tables are sorted by their first occurrence in the raw
+        SQL (left-to-right reading order).
+
+        .. code-block:: sql
+
+           SELECT * FROM users JOIN orders ON ...  -- → ["users", "orders"]
+           CREATE TABLE new_t AS SELECT * FROM src -- → ["new_t", "src"]
+
+        :returns: Ordered list of unique table names.
+        """
+        create_target = None
+        if isinstance(self._ast, exp.Create):
+            # e.g. CREATE TABLE t AS SELECT ... — extract target first
+            create_target = self._extract_create_target()
+
+        collected = self._collect_all()
+        collected_sorted = sorted(collected, key=lambda t: self._first_position(t))
+        return UniqueList(
+            [create_target, *collected_sorted] if create_target
+            else collected_sorted
+        )
+
+    def extract_aliases(self, tables: list[str]) -> dict[str, str]:
+        """Extract table alias mappings from the AST.
+
+        Walks all ``exp.Table`` nodes and maps each alias back to its
+        fully-qualified table name, but only if the table appears in the
+        provided *tables* list.
+
+        .. code-block:: sql
+
+           SELECT u.id FROM users u  -- → {"u": "users"}
+
+        :param tables: List of known table names (from :meth:`extract`).
+        :returns: Mapping of ``{alias: table_name}``.
+        """
+        aliases = {}
+        for table in self._table_nodes():
+            alias = table.alias
+            if not alias:
+                # e.g. SELECT * FROM users — no alias, skip
+                continue
+            full_name = self._table_full_name(table)
+            if full_name in tables:
+                aliases[alias] = full_name
+
+        return aliases
+
+    # -------------------------------------------------------------------
+    # Collection helpers
+    # -------------------------------------------------------------------
+
+    def _extract_create_target(self) -> str | None:
+        """Extract the target table name from a ``CREATE TABLE`` statement.
+
+        The ``CREATE`` node's ``this`` arg may be a ``Table`` directly or a
+        ``Schema`` wrapping one — both cases are handled.
+
+        .. code-block:: sql
+
+           CREATE TABLE my_table (id INT)              -- → "my_table"
+           CREATE TABLE my_table AS SELECT * FROM src  -- → "my_table"
+
+        :returns: Target table name, or ``None`` if it cannot be determined.
+        """
+        target = self._ast.this
+        target_table = (
+            # e.g. CREATE TABLE t (col INT) — target.this is Schema, find Table inside
+            target.find(exp.Table) if not isinstance(target, exp.Table)
+            # e.g. CREATE TABLE t AS SELECT ... — target.this is Table directly
+            else target
+        )
+        name = self._table_full_name(target_table)
+        return name or None
+
+    def _collect_all(self) -> UniqueList:
+        """Collect table names from all ``exp.Table`` AST nodes.
+
+        Iterates over every ``exp.Table`` node, builds the full name, and
+        filters out CTE names so that only real tables are collected.
+
+        .. code-block:: sql
+
+           WITH cte AS (SELECT 1) SELECT * FROM cte, real_table
+           -- cte is filtered out → collects only "real_table"
+
+        :returns: :class:`UniqueList` of table names (unsorted).
+        """
+        collected = UniqueList()
+        for table in self._table_nodes():
+            full_name = self._table_full_name(table)
+            if full_name and full_name not in self._cte_names:
+                # e.g. FROM users — real table, collect it
+                collected.append(full_name)
+            # else: e.g. FROM cte_name — CTE reference, skip
+        return collected
+
+    def _table_nodes(self) -> list[exp.Table]:
+        """Return all ``exp.Table`` nodes from the AST (cached).
+
+        Uses ``find_all(exp.Table)`` which performs a DFS traversal, finding
+        tables in subqueries, CTEs, and joins.  Results are cached so
+        repeated calls (from :meth:`extract_aliases`, :meth:`_collect_all`)
+        don't re-walk the tree.
+
+        :returns: List of ``exp.Table`` AST nodes.
+        """
+        if self._cached_table_nodes is None:
+            self._cached_table_nodes = list(self._ast.find_all(exp.Table))
+        return self._cached_table_nodes
+
+    # -------------------------------------------------------------------
+    # Table name construction
+    # -------------------------------------------------------------------
+
+    def _table_full_name(self, table: exp.Table) -> str:
+        """Build a fully-qualified table name from an ``exp.Table`` node.
+
+        In bracket mode (TSQL), delegates to :func:`_bracketed_full_name` to
+        preserve ``[square bracket]`` quoting.  Otherwise, assembles a
+        dot-joined name from catalog, db, and name parts.  Double-dot
+        notation (``server..table``) is detected from the raw SQL.
+
+        .. code-block:: sql
+
+           SELECT * FROM mydb.dbo.users  -- → "mydb.dbo.users"
+           SELECT * FROM [dbo].[Users]   -- (TSQL) → "[dbo].[Users]"
+           SELECT * FROM server..users   -- → "server..users"
+
+        :param table: An ``exp.Table`` AST node.
+        :returns: Fully-qualified table name string.
+        """
+        name = table.name
+
+        if self._bracket_mode:
+            # e.g. SELECT * FROM [dbo].[Users] — preserve bracket notation
+            bracketed = _bracketed_full_name(table)
+            if bracketed:
+                return bracketed
+
+        # e.g. SELECT * FROM server..table — detect double-dot in raw SQL
+        has_double_dot = bool(name and f"..{name}" in self._raw_sql)
+        return _assemble_dotted_name(
+            table.catalog, table.db, name, preserve_empty=has_double_dot
+        )
+
+    # -------------------------------------------------------------------
+    # Position detection
+    # -------------------------------------------------------------------
+
+    def _first_position(self, name: str) -> int:
+        """Find the first occurrence of a table name in a table context.
+
+        Position sorting ensures the output order matches the left-to-right
+        reading order of the SQL.  First tries to find the name after a
+        table-introducing keyword (``FROM``, ``JOIN``, etc.); if not found,
+        falls back to any whole-word occurrence; if still not found, returns
+        the SQL length (pushing unknown names to the end).
+
+        .. code-block:: sql
+
+           SELECT * FROM b JOIN a ON ...  -- a at pos ~22, b at pos ~14 → [b, a]
+
+        :param name: Table name to locate.
+        :returns: Character position (0-based), or ``len(sql)`` if not found.
+        """
+        name_upper = name.upper()
+
+        # try 1: find after a table keyword (FROM, JOIN, etc.)
+        pos = self._find_word_in_table_context(name_upper)
+        if pos >= 0:
+            return pos
+
+        # try 2: find as a bare word anywhere in the SQL
+        pos = self._find_word(name_upper)
+        return pos if pos >= 0 else len(self._raw_sql)
+
+    def _find_word_in_table_context(self, name_upper: str) -> int:
+        """Find a table name that appears after a table-introducing keyword.
+
+        Scans all whole-word occurrences of *name_upper* and returns the
+        position of the first one that is directly preceded by a table
+        keyword (``FROM``, ``JOIN``, etc.) or is part of a comma-separated
+        table list following such a keyword.
+
+        .. code-block:: sql
+
+           SELECT t.id FROM users t   -- "users" preceded by FROM → match
+           SELECT * FROM t1, t2       -- "t2" preceded by comma after FROM → match
+           SELECT users FROM other    -- "users" in SELECT list → no match here
+
+        :param name_upper: Upper-cased table name to search for.
+        :returns: Position of the match, or ``-1`` if not found in table context.
+        """
+        for match in self._word_pattern(name_upper).finditer(self._upper_sql):
+            pos: int = int(match.start())
+            before = self._upper_sql[:pos].rstrip()
+            if _ends_with_table_keyword(before):
+                # e.g. FROM users — directly after table keyword
+                return pos
+            if before.endswith(",") and _is_in_comma_list_after_keyword(before):
+                # e.g. FROM t1, t2 — part of comma-separated list
+                return pos
+        return -1
+
+    def _find_word(self, name_upper: str, start: int = 0) -> int:
+        """Find *name_upper* as a whole word in the upper-cased SQL.
+
+        Uses a cached regex pattern that respects word boundaries and
+        handles optionally-quoted segments for dotted names.
+
+        :param name_upper: Upper-cased name to search for.
+        :param start: Position to start searching from.
+        :returns: Position of the match, or ``-1`` if not found.
+        """
+        match = self._word_pattern(name_upper).search(self._upper_sql, start)
+        return int(match.start()) if match else -1
+
+    _pattern_cache: dict[str, re.Pattern[str]] = {}
+
+    # Optional quote wrappers — cover backticks, single/double quotes, and brackets
+    _OPT_OPEN_QUOTE = r"""[`"'\[]?"""
+    _OPT_CLOSE_QUOTE = r"""[`"'\]]?"""
+
+    @staticmethod
+    def _word_pattern(name_upper: str) -> re.Pattern[str]:
+        """Build a regex matching *name_upper* as a whole word (cached).
+
+        For qualified names (containing dots), each segment may be optionally
+        wrapped in backticks, single/double quotes, or brackets — so the
+        pattern for ``SCHEMA.TABLE`` also matches ``"SCHEMA"."TABLE"``,
+        ``[SCHEMA].[TABLE]``, or ```SCHEMA`.`TABLE```.
+
+        The pattern is compiled once and cached in a class-level dict for
+        reuse across calls and instances.
+
+        .. code-block:: sql
+
+           SELECT * FROM schema.table       -- matched by SCHEMA.TABLE
+           SELECT * FROM "schema"."table"   -- also matched
+           SELECT * FROM [schema].[table]   -- also matched
+
+        :param name_upper: Upper-cased table name (may contain dots).
+        :returns: Compiled regex pattern with word-boundary assertions.
+        """
+        pat = TableExtractor._pattern_cache.get(name_upper)
+        if pat is None:
+            oq = TableExtractor._OPT_OPEN_QUOTE
+            cq = TableExtractor._OPT_CLOSE_QUOTE
+            segments = name_upper.split(".")
+            inner = r"\.".join(
+                oq + re.escape(seg) + cq for seg in segments
+            )
+            pat = re.compile(r"(?<![A-Za-z0-9_])" + inner + r"(?![A-Za-z0-9_])")
+            TableExtractor._pattern_cache[name_upper] = pat
+        return pat
diff --git a/sql_metadata/token.py b/sql_metadata/token.py
deleted file mode 100644
index 499b20ba..00000000
--- a/sql_metadata/token.py
+++ /dev/null
@@ -1,566 +0,0 @@
-"""
-Module contains internal SQLToken that creates linked list
-"""
-
-from typing import Dict, List, Union
-
-import sqlparse.sql
-from sqlparse.tokens import Comment, Name, Number, Punctuation, Wildcard, Keyword
-
-from sql_metadata.keywords_lists import (
-    KEYWORDS_BEFORE_COLUMNS,
-    RELEVANT_KEYWORDS,
-    QueryType,
-    TABLE_ADJUSTMENT_KEYWORDS,
-)
-
-
-class SQLToken:  # pylint: disable=R0902, R0904
-    """
-    Class representing single token and connected into linked list
-    """
-
-    def __init__(
-        self,
-        tok: sqlparse.sql.Token = None,
-        index: int = -1,
-        subquery_level: int = 0,
-        last_keyword: str = None,
-    ):
-        self.position = index
-        if tok is None:
-            self._set_default_values()
-        else:
-            self.value = tok.value.strip("`").strip('"')
-            self.is_keyword = tok.is_keyword or (
-                tok.ttype.parent is Name and tok.ttype is not Name
-            )
-            self.is_name = tok.ttype is Name
-            self.is_punctuation = tok.ttype is Punctuation
-            self.is_dot = str(tok) == "."
-            self.is_wildcard = tok.ttype is Wildcard
-            self.is_integer = tok.ttype is Number.Integer
-            self.is_float = tok.ttype is Number.Float
-            self.is_comment = tok.ttype is Comment or tok.ttype.parent == Comment
-            self.is_as_keyword = tok.ttype is Keyword and tok.normalized == "AS"
-
-            self.is_left_parenthesis = str(tok) == "("
-            self.is_right_parenthesis = str(tok) == ")"
-            self.last_keyword = last_keyword
-            self.next_token = EmptyToken
-            self.previous_token = EmptyToken
-            self.subquery_level = subquery_level
-        self.token_type = None
-
-        self._set_default_parenthesis_status()
-
-    def _set_default_values(self):
-        self.value = ""
-        self.is_keyword = False
-        self.is_name = False
-        self.is_punctuation = False
-        self.is_dot = False
-        self.is_wildcard = False
-        self.is_integer = False
-        self.is_float = False
-        self.is_comment = False
-        self.is_as_keyword = False
-
-        self.is_left_parenthesis = False
-        self.is_right_parenthesis = False
-        self.last_keyword = None
-        self.subquery_level = 0
-        self.next_token = None
-        self.previous_token = None
-
-    def _set_default_parenthesis_status(self):
-        self.is_in_nested_function = False
-        self.parenthesis_level = 0
-        self.is_subquery_start = False
-        self.is_subquery_end = False
-        self.is_with_query_start = False
-        self.is_with_query_end = False
-        self.is_with_columns_start = False
-        self.is_with_columns_end = False
-        self.is_nested_function_start = False
-        self.is_nested_function_end = False
-        self.is_column_definition_start = False
-        self.is_column_definition_end = False
-        self.is_create_table_columns_declaration_start = False
-        self.is_create_table_columns_declaration_end = False
-        self.is_partition_clause_start = False
-        self.is_partition_clause_end = False
-
-    def __str__(self):
-        """
-        String representation
-        """
-        return self.value.strip('"')
-
-    def __repr__(self) -> str:  # pragma: no cover
-        """
-        Representation - useful for debugging
-        """
-        repr_str = ["=".join([str(k), str(v)]) for k, v in self.__dict__.items()]
-        return f"SQLToken({','.join(repr_str)})"
-
-    @property
-    def normalized(self) -> str:
-        """
-        Property returning uppercase value without end lines and spaces
-        """
-        return self.value.translate(str.maketrans("", "", " \n\t\r")).upper()
-
-    @property
-    def stringified_token(self) -> str:
-        """
-        Returns string representation with whitespace or not - used to rebuild query
-        from list of tokens
-        """
-        if self.previous_token:
-            if (
-                self.normalized in [")", ".", ","]
-                or self.previous_token.normalized in ["(", "."]
-                or (
-                    self.is_left_parenthesis
-                    and self.previous_token.normalized
-                    not in RELEVANT_KEYWORDS.union({"*", ",", "IN", "NOTIN"})
-                )
-            ):
-                return str(self)
-            return f" {self}"
-        return str(self)  # pragma: no cover
-
-    @property
-    def last_keyword_normalized(self) -> str:
-        """
-        Property returning uppercase last keyword without end lines and spaces
-        """
-        if self.last_keyword:
-            return self.last_keyword.translate(str.maketrans("", "", " \n\t\r")).upper()
-        return ""
-
-    @property
-    def is_in_parenthesis(self) -> bool:
-        """
-        Property checks if token is surrounded with brackets ()
-        """
-        return self.parenthesis_level > 0
-
-    @property
-    def is_create_table_columns_definition(self) -> bool:
-        """
-        Checks if given token is inside columns definition in
-        create table query like: create table name (<columns def>)
-        """
-        open_parenthesis = self.find_nearest_token(
-            True, value_attribute="is_create_table_columns_declaration_start"
-        )
-        if open_parenthesis is EmptyToken:
-            return False
-        close_parenthesis = self.find_nearest_token(
-            True,
-            direction="right",
-            value_attribute="is_create_table_columns_declaration_end",
-        )
-        return (
-            open_parenthesis is not EmptyToken and close_parenthesis is not EmptyToken
-        )
-
-    @property
-    def is_keyword_column_name(self) -> bool:
-        """
-        Checks if given keyword can be a column name in SELECT query
-        """
-        return (
-            self.is_keyword
-            and self.normalized not in RELEVANT_KEYWORDS
-            and self.previous_token.normalized in [",", "SELECT"]
-            and self.next_token.normalized in [",", "AS", "FROM"]
-        )
-
-    @property
-    def is_alias_without_as(self) -> bool:
-        """
-        Checks if a given token is an alias without as keyword,
-        like: SELECT col <alias1>, col2 <alias2> from table
-        """
-        return (
-            self.next_token.normalized in [",", "FROM"]
-            and self.previous_token.normalized not in ["*", ",", ".", "(", "SELECT"]
-            and not self.previous_token.is_keyword
-            and (
-                self.last_keyword_normalized == "SELECT"
-                or self.previous_token.is_column_definition_end
-                or self.previous_token.is_partition_clause_end
-            )
-            and not self.previous_token.is_comment
-        )
-
-    @property
-    def is_alias_definition(self):
-        """
-        Returns if current token is a definition of an alias.
-        Note that aliases can also be used in other queries and be a part
-        of other nested columns with aliases.
-
-        Note that this function only check if alias token is a token with
-        alias definition, it's not suitable for determining IF token is an alias
-        as it's more complicated and this method would match
-        also i.e. sub-queries names
-        """
-        return (
-            self.is_alias_without_as
-            or self.previous_token.normalized == "AS"
-            or self.is_in_with_columns
-        )
-
-    @property
-    def is_alias_of_self(self) -> bool:
-        """
-        Checks if a given token is an alias but at the same time
-        is also an alias of self, so not really an alias
-        """
-
-        end_of_column = self.find_nearest_token(
-            [",", "FROM"], value_attribute="normalized", direction="right"
-        )
-        while end_of_column.is_in_nested_function:
-            end_of_column = end_of_column.find_nearest_token(
-                [",", "FROM"], value_attribute="normalized", direction="right"
-            )
-        return end_of_column.previous_token.normalized == self.normalized
-
-    @property
-    def is_in_with_columns(self) -> bool:
-        """
-        Checks if token is inside with colums part of a query
-        """
-        return (
-            self.find_nearest_token("(").is_with_columns_start
-            and self.find_nearest_token(")", direction="right").is_with_columns_end
-        )
-
-    @property
-    def is_wildcard_not_operator(self):
-        """
-        Determines if * encountered in query is a wildcard like select <*> from aa
-        or is that an operator like Select aa <*> bb as cc from dd
-        """
-        return self.normalized == "*" and (
-            self.previous_token.value in [",", ".", "SELECT"]
-            or (self.previous_token.value == "(")
-            and self.next_token.value == ")"
-        )
-
-    @property
-    def is_potential_table_name(self) -> bool:
-        """
-        Checks if token is a possible candidate for table name
-        """
-        return (
-            (self.is_name or self.is_keyword)
-            and self.last_keyword_normalized in TABLE_ADJUSTMENT_KEYWORDS
-            and self.previous_token.normalized not in ["AS", "WITH"]
-            and self.normalized
-            not in ["AS", "SELECT", "IF", "SET", "WITH", "IFNOTEXISTS"]
-        )
-
-    @property
-    def is_with_statement_nested_in_subquery(self) -> bool:
-        """
-        Checks if token is with statement nested in subquery
-        """
-        return (
-            self.normalized == "WITH"
-            and self.previous_token.is_left_parenthesis
-            and self.get_nth_previous(2).normalized == "FROM"
-        )
-
-    @property
-    def is_alias_of_table_or_alias_of_subquery(self) -> bool:
-        """
-        Checks if token is alias of table or alias of subquery
-
-        It's not a list of tables, e.g. SELECT * FROM foo, bar
-        hence, it can be the case of alias without AS, e.g. SELECT * FROM foo bar
-        or an alias of subquery (SELECT * FROM foo) bar
-        """
-        is_alias_without_as = (
-            self.previous_token.normalized != self.last_keyword_normalized
-            and not self.previous_token.is_punctuation
-            and not self.previous_token.normalized == "IFNOTEXISTS"
-        )
-        return is_alias_without_as or self.previous_token.is_right_parenthesis
-
-    @property
-    def is_a_wildcard_in_select_statement(self) -> bool:
-        """
-        Checks if token is a wildcard in select statement
-
-        Handle * wildcard in select part, but ignore count(*)
-        """
-        return (
-            self.is_wildcard
-            and self.last_keyword_normalized == "SELECT"
-            and not self.previous_token.is_left_parenthesis
-        )
-
-    @property
-    def is_potential_column_name(self) -> bool:
-        """
-        Checks if token is a potential column name
-        """
-        return (
-            self.last_keyword_normalized in KEYWORDS_BEFORE_COLUMNS
-            and self.previous_token.normalized not in ["AS", ")"]
-            and not self.is_alias_without_as
-        )
-
-    @property
-    def is_conversion_specifier(self) -> bool:
-        """
-        Checks if token is a format or data type in cast or convert
-        """
-        return (
-            self.previous_token.normalized in ["AS", "USING"]
-            and self.is_in_nested_function
-        )
-
-    @property
-    def is_column_name_inside_insert_clause(self) -> bool:
-        """
-        Checks if token is a column name inside insert clause,
-        e.g. INSERT INTO `foo` (col1, `col2`) VALUES (..)
-        """
-        return (
-            self.last_keyword_normalized == "INTO"
-            and self.previous_token.is_punctuation
-        )
-
-    @property
-    def is_potential_alias(self) -> bool:
-        """
-        Checks if given token can possibly be an alias
-        """
-        return self.is_name or (
-            self.is_keyword
-            and self.previous_token.normalized == "AS"
-            and self.last_keyword_normalized == "SELECT"
-        )
-
-    @property
-    def is_a_valid_alias(self) -> bool:
-        """
-        Checks if given token meets the alias criteria
-        """
-        return (
-            self.last_keyword_normalized in KEYWORDS_BEFORE_COLUMNS
-            and self.normalized not in ["DIV"]
-            and self.is_alias_definition
-            and not self.is_in_nested_function
-            or self.is_in_with_columns
-        )
-
-    @property
-    def next_token_not_comment(self):
-        """
-        Property returning next non-comment token
-        """
-        if self.next_token and self.next_token.is_comment:
-            return self.next_token.next_token_not_comment
-        return self.next_token
-
-    @property
-    def previous_token_not_comment(self):
-        """
-        Property returning previous non-comment token
-        """
-        if self.previous_token and self.previous_token.is_comment:
-            return self.previous_token.previous_token_not_comment
-        return self.previous_token
-
-    def is_constraint_definition_inside_create_table_clause(
-        self, query_type: str
-    ) -> bool:
-        """
-        Checks if token is constraint definition inside create table clause
-
-        Used to handle CREATE TABLE queries (#35) to skip keyword that are withing
-        parenthesis-wrapped list of column
-        """
-        return (
-            query_type == QueryType.CREATE.value
-            and self.is_in_parenthesis
-            and self.is_create_table_columns_definition
-        )
-
-    def is_columns_alias_of_with_query_or_column_in_insert_query(
-        self, with_names: List[str]
-    ) -> bool:
-        """
-        Check if token is column alias of with query or column in insert query
-
-        We are in <columns> of INSERT INTO <TABLE> (<columns>),
-        or columns of with statement: with (<columns>) as ...
-        """
-        return self.is_in_parenthesis and (
-            self.find_nearest_token("(").previous_token.value in with_names
-            or self.last_keyword_normalized == "INTO"
-        )
-
-    def is_sub_query_alias(self, subqueries_names: List[str]) -> bool:
-        """
-        Checks for aliases of sub-queries i.e.: SELECT from (...) <alias>
-        """
-        return (
-            self.previous_token.is_right_parenthesis and self.value in subqueries_names
-        )
-
-    def is_with_query_name(self, with_names: List[str]) -> bool:
-        """
-        checks for names of the with queries <name> as (subquery)
-        """
-        return self.next_token.normalized == "AS" and self.value in with_names
-
-    def is_sub_query_name_or_with_name_or_function_name(
-        self, sub_queries_names: List[str], with_names: List[str]
-    ) -> bool:
-        """
-        Check for non applicable names: with, subquery or custom function
-        """
-        return (
-            self.is_sub_query_alias(subqueries_names=sub_queries_names)
-            or self.is_with_query_name(with_names=with_names)
-            or self.next_token.is_left_parenthesis
-        )
-
-    def is_not_an_alias_or_is_self_alias_outside_of_subquery(
-        self, columns_aliases_names: List[str], max_subquery_level: Dict
-    ) -> bool:
-        """
-        Checks if token is not alias or alias of self outside of sub query
-        """
-        return (
-            self.value not in columns_aliases_names
-            or self.token_is_alias_of_self_not_from_subquery(
-                aliases_levels=max_subquery_level
-            )
-            or self.token_name_is_same_as_alias_not_from_subquery(
-                aliases_levels=max_subquery_level
-            )
-        )
-
-    def is_table_definition_suffix_in_non_select_create_table(
-        self, query_type: str
-    ) -> bool:
-        """
-        Checks if we are after create table definition.
-
-        Ignore annotations outside the parenthesis with the list of columns
-        e.g. ) CHARACTER SET utf8;
-        """
-        return (
-            query_type == QueryType.CREATE
-            and not self.is_in_parenthesis
-            and self.find_nearest_token("SELECT", value_attribute="normalized")
-            is EmptyToken
-        )
-
-    def is_column_definition_inside_create_table(self, query_type: str) -> bool:
-        """
-        Checks for column names in create table
-
-        Previous token is either ( or , -> indicates the column name
-        """
-        return (
-            query_type == QueryType.CREATE
-            and self.is_in_parenthesis
-            and self.previous_token.is_punctuation
-            and self.last_keyword_normalized == "TABLE"
-        )
-
-    def is_potential_column_alias(
-        self, columns_aliases_names: List[str], column_aliases: Dict
-    ) -> bool:
-        """
-        Checks if column can be an alias
-        """
-        return (
-            self.value in columns_aliases_names
-            and self.value not in column_aliases
-            and not self.previous_token.is_nested_function_start
-            and self.is_alias_definition
-        )
-
-    def token_is_alias_of_self_not_from_subquery(self, aliases_levels: Dict) -> bool:
-        """
-        Checks if token is also an alias, but is an alias of self that is not
-        coming from a subquery, that means it's a valid column
-        """
-        return (
-            self.last_keyword_normalized == "SELECT"
-            and self.is_alias_of_self
-            and self.subquery_level == aliases_levels[self.value]
-        )
-
-    def token_name_is_same_as_alias_not_from_subquery(
-        self, aliases_levels: Dict
-    ) -> bool:
-        """
-        Checks if token is also an alias, but is an alias of self that is not
-        coming from a subquery, that means it's a valid column
-        """
-        return (
-            self.last_keyword_normalized == "SELECT"
-            and self.next_token.normalized == "AS"
-            and self.subquery_level == aliases_levels[self.value]
-        )
-
-    def table_prefixed_column(self, table_aliases: Dict) -> str:
-        """
-        Substitutes table alias with actual table name
-        """
-        value = self.value
-        if "." in value:
-            parts = value.split(".")
-            if len(parts) > 4:  # pragma: no cover
-                raise ValueError(f"Wrong columns name: {value}")
-            parts[0] = table_aliases.get(parts[0], parts[0])
-            value = ".".join(parts)
-        return value
-
-    def get_nth_previous(self, level: int) -> "SQLToken":
-        """
-        Function iterates previous tokens getting nth previous token
-        """
-        assert level >= 1
-        if self.previous_token:
-            if level > 1:
-                return self.previous_token.get_nth_previous(level=level - 1)
-            return self.previous_token
-        return EmptyToken  # pragma: no cover
-
-    def find_nearest_token(
-        self,
-        value: Union[Union[str, bool], List[Union[str, bool]]],
-        direction: str = "left",
-        value_attribute: str = "value",
-    ) -> "SQLToken":
-        """
-        Returns token with given value to the left or right.
-        If value is not found it returns EmptyToken.
-        """
-        if not isinstance(value, list):
-            value = [value]
-        attribute = "previous_token" if direction == "left" else "next_token"
-        token = self
-        while getattr(token, attribute):
-            tok_value = getattr(getattr(token, attribute), value_attribute)
-            if tok_value in value:
-                return getattr(token, attribute)
-            token = getattr(token, attribute)
-        return EmptyToken
-
-
-EmptyToken = SQLToken()  # pylint: disable=invalid-name
diff --git a/sql_metadata/utils.py b/sql_metadata/utils.py
index ccde60a4..df6d0453 100644
--- a/sql_metadata/utils.py
+++ b/sql_metadata/utils.py
@@ -1,35 +1,59 @@
-"""
-Module with various utils
+"""Utility classes and functions shared across the sql-metadata package.
+
+Provides ``UniqueList``, a deduplicating list used to collect columns,
+tables, aliases, and CTE names while preserving insertion order, and
+a ``last_segment`` helper for qualified name handling.
 """
 
-from typing import Any, List, Sequence
+from typing import Any, Iterable
 
+#: Placeholder used to encode dots in qualified CTE names so that sqlglot
+#: does not misinterpret ``db.cte_name`` as a table reference.
+DOT_PLACEHOLDER = "__DOT__"
 
-class UniqueList(list):
-    """
-    List that keeps it's items unique
+
+class UniqueList(list[str]):
+    """A list subclass that silently rejects duplicate items.
+
+    Used throughout the extraction pipeline (``_extract.py``, ``parser.py``)
+    to collect columns, tables, aliases, CTE names, and subquery names while
+    guaranteeing uniqueness and preserving first-insertion order.  Maintains
+    an internal ``set`` for O(1) membership checks.
     """
 
+    def __init__(self, iterable: Any = None, **kwargs: Any) -> None:
+        self._seen: set[str] = set()
+        if iterable is not None:
+            super().__init__(**kwargs)
+            self.extend(iterable)
+        else:
+            super().__init__(**kwargs)
+            self._seen = set(self)
+
     def append(self, item: Any) -> None:
-        if item not in self:
+        """Append *item* only if it is not already present (O(1) check)."""
+        if item not in self._seen:
+            self._seen.add(item)
             super().append(item)
 
-    def extend(self, items: Sequence[Any]) -> None:
+    def extend(self, items: Iterable[Any]) -> None:  # type: ignore[override]
+        """Extend the list with *items*, skipping duplicates."""
         for item in items:
             self.append(item)
 
-    def __sub__(self, other) -> List:
-        return [x for x in self if x not in other]
+    def __contains__(self, item: Any) -> bool:
+        """O(1) membership check using the internal set."""
+        return item in self._seen
+
+    def __sub__(self, other: Any) -> list[str]:
+        """Return a plain list of elements in *self* that are not in *other*."""
+        other_set = set(other)
+        return [x for x in self if x not in other_set]
+
+
+
+def last_segment(name: str) -> str:
+    """Return the last dot-separated segment of a qualified name."""
+    return name.rsplit(".", 1)[-1]
 
 
-def flatten_list(input_list: List) -> List[str]:
-    """
-    Flattens list of string and lists if there are nested lists.
-    """
-    result = []
-    for item in input_list:
-        if isinstance(item, list):
-            result.extend(flatten_list(item))
-        else:
-            result.append(item)
-    return result
diff --git a/test/test_aliases.py b/test/test_aliases.py
index 1d822fde..97d0d656 100644
--- a/test/test_aliases.py
+++ b/test/test_aliases.py
@@ -16,9 +16,9 @@ def test_get_query_table_aliases():
     assert Parser(
         "SELECT bar AS value FROM foo AS f INNER JOIN dimensions AS d ON f.id = d.id"
     ).tables_aliases == {"f": "foo", "d": "dimensions"}
-    assert (
-        Parser("SELECT e.foo FROM (SELECT * FROM bar) AS e").tables_aliases == {}
-    ), "Sub-query aliases are ignored"
+    assert Parser("SELECT e.foo FROM (SELECT * FROM bar) AS e").tables_aliases == {}, (
+        "Sub-query aliases are ignored"
+    )
     assert Parser(
         "SELECT a.* FROM product_a AS a "
         "JOIN product_b AS b ON a.ip_address = b.ip_address"
@@ -44,3 +44,27 @@ def test_tables_aliases_are_resolved():
         "users1.ip_address",
         "users2.ip_address",
     ]
+
+
+def test_column_alias_same_as_join_table_alias():
+    # solved: https://github.com/macbre/sql-metadata/issues/424
+    query = """
+    SELECT
+        dependent_schema.name as dependent_schema,
+        relationships.dependent_name as dependent_name
+    FROM relationships
+        JOIN schema AS dependent_schema
+            ON relationships.dependent_schema_id = dependent_schema.id
+        JOIN schema AS referenced_schema
+            ON relationships.referenced_schema_id = referenced_schema.id
+    GROUP BY dependent_schema, dependent_name
+    ORDER BY dependent_schema, dependent_name
+    """
+    parser = Parser(query)
+    assert parser.tables == ["relationships", "schema"]
+    assert parser.tables_aliases == {
+        "dependent_schema": "schema",
+        "referenced_schema": "schema",
+    }
+    assert "schema.name" in parser.columns
+    assert "relationships.dependent_name" in parser.columns
diff --git a/test/test_alter.py b/test/test_alter.py
index 572dba2c..69c8188a 100644
--- a/test/test_alter.py
+++ b/test/test_alter.py
@@ -11,3 +11,9 @@ def test_alter_table_indices_index():
     parser = Parser("ALTER TABLE foo_table ADD INDEX `idx_foo` (`bar`);")
     assert parser.query_type == QueryType.ALTER
     assert parser.tables == ["foo_table"]
+
+
+def test_alter_table_add_column():
+    """ALTER TABLE ADD COLUMN is parsed correctly."""
+    p = Parser("ALTER TABLE t ADD COLUMN new_col INT")
+    assert p.query_type == "ALTER TABLE"
diff --git a/test/test_column_aliases.py b/test/test_column_aliases.py
index d0a1d336..bf6e4d06 100644
--- a/test/test_column_aliases.py
+++ b/test/test_column_aliases.py
@@ -24,15 +24,10 @@ def test_column_aliases_with_subquery():
     """
     parser = Parser(query)
     assert parser.tables == ["data_contracts_report"]
-    assert parser.subqueries_names == ["sq2", "sq"]
-    assert parser.subqueries == {
-        "sq": "SELECT count(C2) as C2Count, BusinessSource, yearweek(Start1) Start1, "
-        "yearweek(End1) End1 from (SELECT ContractID as C2, BusinessSource, "
-        "StartDate as Start1, EndDate as End1 from data_contracts_report) sq2 "
-        "group by 2, 3, 4",
-        "sq2": "SELECT ContractID as C2, BusinessSource, StartDate as Start1, EndDate "
-        "as End1 from data_contracts_report",
-    }
+    assert parser.subqueries_names == ["sq2", "sq", "subquery_1"]
+    assert "sq" in parser.subqueries
+    assert "sq2" in parser.subqueries
+    assert "subquery_1" in parser.subqueries
     assert parser.columns == [
         "SignDate",
         "BusinessSource",
diff --git a/test/test_comments.py b/test/test_comments.py
index 9a93bb5a..16db5f99 100644
--- a/test/test_comments.py
+++ b/test/test_comments.py
@@ -155,61 +155,37 @@ def test_inline_comments_with_hash():
     assert parser.comments == []
 
 
-def test_next_token_not_comment_single():
-    query = """
-        SELECT column_1 -- comment_1
-        FROM table_1 
-    """
+def test_without_comments_for_multiline_query():
+    query = """SELECT * -- comment
+        FROM table
+        WHERE table.id = '123'"""
     parser = Parser(query)
-    column_1_tok = parser.tokens[1]
-
-    assert column_1_tok.next_token.is_comment
-    assert not column_1_tok.next_token_not_comment.is_comment
-    assert column_1_tok.next_token.next_token == column_1_tok.next_token_not_comment
+    assert parser.without_comments == """SELECT * FROM table WHERE table.id = '123'"""
 
 
-def test_next_token_not_comment_multiple():
-    query = """
-            SELECT column_1 -- comment_1
-            
-            /*
-            comment_2
-            */
-            
-            # comment_3
-            FROM table_1
-        """
+def test_table_after_comment_not_ignored():
+    # solved: https://github.com/macbre/sql-metadata/issues/251
+    query = """SELECT c1 FROM
+       --Comment--
+        d1, d2, d3"""
     parser = Parser(query)
-    column_1_tok = parser.tokens[1]
+    assert parser.tables == ["d1", "d2", "d3"]
+    assert parser.columns == ["c1"]
+    assert parser.columns_dict == {"select": ["c1"]}
 
-    assert column_1_tok.next_token.is_comment
-    assert column_1_tok.next_token.next_token.is_comment
-    assert column_1_tok.next_token.next_token.next_token.is_comment
-    assert not column_1_tok.next_token_not_comment.is_comment
-    assert (
-        column_1_tok.next_token.next_token.next_token.next_token
-        == column_1_tok.next_token_not_comment
-    )
 
+def test_extract_comments_empty_string():
+    """Extracting comments from empty SQL returns empty list."""
+    assert Parser("").comments == []
 
-def test_next_token_not_comment_on_non_comments():
-    query = """
-            SELECT column_1
-            FROM table_1
-        """
-    parser = Parser(query)
-    select_tok = parser.tokens[0]
 
-    assert select_tok.next_token == select_tok.next_token_not_comment
-    assert (
-        select_tok.next_token.next_token
-        == select_tok.next_token_not_comment.next_token_not_comment
-    )
+def test_strip_comments_empty_string():
+    """Stripping comments from empty SQL returns empty string."""
+    assert Parser("").without_comments == ""
 
 
-def test_without_comments_for_multiline_query():
-    query = """SELECT * -- comment
-        FROM table
-        WHERE table.id = '123'"""
-    parser = Parser(query)
-    assert parser.without_comments == """SELECT * FROM table WHERE table.id = '123'"""
+def test_strip_comments_for_parsing_empty():
+    """SqlCleaner handles empty strings via strip_comments_for_parsing."""
+    from sql_metadata.comments import strip_comments_for_parsing
+
+    assert strip_comments_for_parsing("") == ""
diff --git a/test/test_compat.py b/test/test_compat.py
deleted file mode 100644
index 3883774f..00000000
--- a/test/test_compat.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from sqlparse.tokens import Punctuation, Wildcard
-
-from sql_metadata.compat import (
-    get_query_columns,
-    get_query_tables,
-    get_query_limit_and_offset,
-    generalize_sql,
-    preprocess_query,
-    get_query_tokens,
-)
-
-
-def test_get_query_columns():
-    assert ["*"] == get_query_columns("SELECT * FROM `test_table`")
-    assert ["foo", "id"] == get_query_columns(
-        "SELECT foo, count(*) as bar FROM `test_table` WHERE id = 3"
-    )
-
-
-def test_get_query_tables():
-    assert ["test_table"] == get_query_tables("SELECT * FROM `test_table`")
-    assert ["test_table", "second_table"] == get_query_tables(
-        "SELECT foo FROM test_table, second_table WHERE id = 1"
-    )
-
-
-def test_get_query_limit_and_offset():
-    assert (200, 927600) == get_query_limit_and_offset(
-        "SELECT * FOO foo LIMIT 927600,200"
-    )
-
-
-def test_generalize_sql():
-    assert generalize_sql() is None
-    assert "SELECT * FROM foo;" == generalize_sql("SELECT * FROM foo;")
-    assert "SELECT * FROM foo WHERE id = N" == generalize_sql(
-        "SELECT * FROM foo WHERE id = 123"
-    )
-    assert "SELECT test FROM foo" == generalize_sql("SELECT /* foo */ test FROM foo")
-
-
-def test_preprocess_query():
-    assert "SELECT * FROM foo WHERE id = 123" == preprocess_query(
-        "SELECT * FROM foo WHERE id = 123"
-    )
-    assert "SELECT /* foo */ test FROM `foo`.`bar`" == preprocess_query(
-        "SELECT /* foo */ test\nFROM `foo`.`bar`"
-    )
-
-
-def test_get_query_tokens():
-    tokens = get_query_tokens("SELECT * FROM foo;")
-    assert len(tokens) == 5
-
-    assert tokens[0].normalized == "SELECT"
-    assert tokens[1].ttype is Wildcard
-    assert tokens[2].normalized == "FROM"
-    assert tokens[3].normalized == "foo"
-    assert tokens[4].ttype is Punctuation
-
-    assert [] == get_query_tokens("")
diff --git a/test/test_create_table.py b/test/test_create_table.py
index 6c065d75..2e5d1fec 100644
--- a/test/test_create_table.py
+++ b/test/test_create_table.py
@@ -1,11 +1,11 @@
 import pytest
 
-from sql_metadata import Parser
+from sql_metadata import InvalidQueryDefinition, Parser
 from sql_metadata.keywords_lists import QueryType
 
 
 def test_is_create_table_query():
-    with pytest.raises(ValueError):
+    with pytest.raises(InvalidQueryDefinition):
         assert Parser("BEGIN").query_type
 
     assert Parser("SELECT * FROM `foo` ()").query_type == QueryType.SELECT
@@ -78,7 +78,7 @@ def test_creating_table_as_select_with_with_clause():
     parser = Parser(qry)
     assert parser.query_type == QueryType.CREATE
     assert parser.with_names == ["sub"]
-    assert parser.with_queries == {"sub": "select it_id from internal_table"}
+    assert parser.with_queries == {"sub": "SELECT it_id FROM internal_table"}
     assert parser.columns == [
         "it_id",
         "*",
@@ -170,3 +170,75 @@ def test_create_temporary_table():
     assert parser.query_type == QueryType.CREATE
     assert parser.tables == ["new_tbl", "orig_tbl"]
     assert parser.columns == ["*"]
+
+
+def test_create_index_extracts_table():
+    """CREATE INDEX correctly extracts the target table."""
+    p = Parser("CREATE INDEX idx ON t (col)")
+    assert "t" in p.tables
+
+
+def test_create_table_with_columns_only():
+    """CREATE TABLE with column definitions (no SELECT) extracts columns."""
+    p = Parser("CREATE TABLE users (id INT, name VARCHAR(100), active BOOL)")
+    assert p.columns == ["id", "name", "active"]
+    assert p.tables == ["users"]
+
+
+def test_create_table_with_column_defs_and_select():
+    """CREATE TABLE with both column definitions and AS SELECT."""
+    p = Parser("CREATE TABLE t (id INT) AS SELECT a FROM t2")
+    assert p.columns == ["id", "a"]
+    assert p.tables == ["t", "t2"]
+
+
+def test_ctas_with_redshift_distkey_sortkey():
+    # Solved: https://github.com/macbre/sql-metadata/issues/367
+    p = Parser(
+        "CREATE TABLE my_table distkey(col1) sortkey(col1, col3) "
+        "AS SELECT col1, col2, col3 FROM source_table"
+    )
+    assert p.tables == ["my_table", "source_table"]
+    assert p.columns == ["col1", "col2", "col3"]
+
+
+def test_create_table_mysql_charset_and_collate():
+    # Solved: https://github.com/macbre/sql-metadata/issues/358
+    p = Parser("""CREATE TABLE `jeecg_order_main` (
+      `id` varchar(32) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
+      `order_code` varchar(50) CHARACTER SET utf8 COLLATE utf8_general_ci NULL,
+      `order_date` datetime NULL DEFAULT NULL,
+      `order_money` double(10, 3) NULL DEFAULT NULL,
+      `bpm_status` varchar(3) CHARACTER SET utf8 COLLATE utf8_general_ci NULL,
+      PRIMARY KEY (`id`) USING BTREE
+    ) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci""")
+    assert p.tables == ["jeecg_order_main"]
+    assert p.columns == ["id", "order_code", "order_date", "order_money", "bpm_status"]
+
+
+def test_create_table_with_comments_and_keyword_columns():
+    # Solved: https://github.com/macbre/sql-metadata/issues/507
+    p = Parser("""
+        CREATE TABLE accounts (
+            id INTEGER,     /* comment */
+            username TEXT UNIQUE,
+            status TEXT,
+            online_at INTEGER,
+            hash TEXT UNIQUE,
+            uid TEXT UNIQUE,
+            test INTEGER,
+            usage INTEGER,
+            PRIMARY KEY (id)
+        )
+    """)
+    assert p.tables == ["accounts"]
+    assert p.columns == [
+        "id", "username", "status", "online_at", "hash", "uid", "test", "usage"
+    ]
+
+
+def test_create_table_as_select_with_cte_same_name():
+    """CREATE TABLE target should be reported even when a CTE shares its name."""
+    query = "CREATE TABLE foo AS WITH foo AS (SELECT 1 as id) SELECT * FROM foo"
+    parser = Parser(query)
+    assert parser.tables == ["foo"]
diff --git a/test/test_edge_cases.py b/test/test_edge_cases.py
new file mode 100644
index 00000000..9e685ef3
--- /dev/null
+++ b/test/test_edge_cases.py
@@ -0,0 +1,23 @@
+"""Edge-case tests for internals not covered by feature-specific test files."""
+
+from sql_metadata.sql_cleaner import SqlCleaner
+from sql_metadata.utils import UniqueList
+
+
+def test_unique_list_subtraction():
+    """UniqueList.__sub__ returns elements not present in the other list."""
+    ul = UniqueList(["a", "b", "c", "d"])
+    result = ul - ["b", "d"]
+    assert result == ["a", "c"]
+
+
+def test_unique_list_deduplicates_on_init():
+    """UniqueList removes duplicates when constructed from an iterable."""
+    ul = UniqueList(["x", "y", "x", "z", "y"])
+    assert list(ul) == ["x", "y", "z"]
+
+
+def test_clean_empty_after_paren_strip():
+    """SQL that becomes empty after outer-paren stripping."""
+    result = SqlCleaner.clean("(())")
+    assert result.sql is None
diff --git a/test/test_getting_columns.py b/test/test_getting_columns.py
index 902e4e51..8ebad638 100644
--- a/test/test_getting_columns.py
+++ b/test/test_getting_columns.py
@@ -113,6 +113,52 @@ def test_columns_with_order_by():
         "foo",
         "id",
     ]
+    # Star inside COUNT(*) in ORDER BY should not be extracted as a column
+    assert Parser(
+        "SELECT dept FROM employees GROUP BY dept ORDER BY COUNT(*) DESC"
+    ).columns == ["dept"]
+
+
+def test_output_columns():
+    # Solved: https://github.com/macbre/sql-metadata/issues/468
+    parser = Parser("""SELECT
+        dj.field_1,
+        cardinality(dj.field_1) as field_1_count,
+        dj.field_2,
+        cardinality(dj.field_2) as field_2_count,
+        dj.field_3 as field_3
+    FROM dj""")
+    assert parser.output_columns == [
+        "dj.field_1", "field_1_count", "dj.field_2", "field_2_count", "field_3"
+    ]
+
+    # Simple alias
+    assert Parser("SELECT a, b AS c FROM t").output_columns == ["a", "c"]
+
+    # Star
+    assert Parser("SELECT * FROM t").output_columns == ["*"]
+
+    # Self-alias preserves original name
+    assert Parser("SELECT a AS a FROM t").output_columns == ["a"]
+
+    # Non-SELECT query returns empty list
+    assert Parser("CREATE TABLE t (id INT)").output_columns == []
+
+    # Solved: https://github.com/macbre/sql-metadata/issues/421
+    # Window function alias resolved in output_columns
+    parser = Parser("""SELECT
+        DATE_TRUNC('month', o.order_date) AS month,
+        c.customer_id,
+        SUM(oi.quantity * oi.unit_price) AS revenue,
+        ROW_NUMBER() OVER (PARTITION BY c.customer_id
+            ORDER BY SUM(oi.quantity * oi.unit_price) DESC) AS revenue_rank
+    FROM orders o
+    JOIN customers c ON o.customer_id = c.customer_id
+    JOIN order_items oi ON o.order_id = oi.order_id""")
+    assert parser.output_columns == [
+        "month", "customers.customer_id", "revenue", "revenue_rank"
+    ]
+    assert "revenue_rank" in parser.columns_aliases
 
 
 def test_update_and_replace():
@@ -304,6 +350,24 @@ def test_columns_and_sql_functions():
     ).columns == ["col", "col2", "col3", "col4", "col5"]
 
 
+def test_odbc_escape_function():
+    # Solved: https://github.com/macbre/sql-metadata/issues/391
+    parser = Parser(
+        "SELECT Calendar_year_lookup.Yr, "
+        "{fn concat('Q', Calendar_year_lookup.Qtr)}, "
+        "sum(Shop_facts.Amount_sold) "
+        "FROM Calendar_year_lookup, Shop_facts "
+        "GROUP BY Calendar_year_lookup.Yr, "
+        "{fn concat('Q', Calendar_year_lookup.Qtr)}"
+    )
+    assert parser.tables == ["Calendar_year_lookup", "Shop_facts"]
+    assert parser.columns == [
+        "Calendar_year_lookup.Yr",
+        "Calendar_year_lookup.Qtr",
+        "Shop_facts.Amount_sold",
+    ]
+
+
 def test_columns_starting_with_keywords():
     query = """
     SELECT `schema_name`, full_table_name, `column_name`, `catalog_name`,
@@ -533,7 +597,7 @@ def test_double_inner_join():
 
     parser = Parser(query)
     assert "loan.account_id" in parser.columns
-    assert parser.tables == ["loan", "account"]
+    assert parser.tables == ["loan", "account", "district"]
 
 
 def test_keyword_column_source():
@@ -555,3 +619,157 @@ def test_keyword_column_source():
     # Test with 'source' as only column
     parser = Parser("select source from my_table")
     assert parser.columns == ["source"]
+
+
+def test_sum_case_when_columns():
+    # solved: https://github.com/macbre/sql-metadata/issues/579
+    query = """
+    SELECT CAST(
+    SUM(CASE WHEN segment = 'Premium' THEN 1 ELSE 0 END) AS REAL) * 100 / 
+    COUNT(*) AS premiumpercentage 
+    FROM gasstations WHERE country = 'SVK'"""
+    parser = Parser(query)
+    assert parser.columns == ["segment", "country"]
+    assert parser.columns_dict == {"select": ["segment"], "where": ["country"]}
+    assert parser.tables == ["gasstations"]
+
+
+def test_quoted_column_with_whitespace():
+    # solved: https://github.com/macbre/sql-metadata/issues/578
+    query = (
+        """SELECT COUNT(*) FROM examination WHERE "Examination Date" > '1997-01-01'"""
+    )
+    parser = Parser(query)
+    assert parser.columns == ["Examination Date"]
+    assert parser.columns_dict == {"where": ["Examination Date"]}
+    assert parser.tables == ["examination"]
+
+
+def test_coalesce_in_joins():
+    # solved: https://github.com/macbre/sql-metadata/issues/559
+    query = """
+    select OPR.ID, OPR.year from operations OPR
+    INNER JOIN my_db_name.ipps_wage_index_annual WI ON OPR.year = WI.cms_year
+    INNER JOIN my_db_name.geo_county_cbsa CBS 
+    ON WI.cbsa_cd = COALESCE(CBS.metropolitan_division_code, CBS.cbsa_code, SUBSTRING(CBS.ssa_codes, 1, 2))"""
+    parser = Parser(query)
+    assert parser.columns == [
+        "operations.ID",
+        "operations.year",
+        "my_db_name.ipps_wage_index_annual.cms_year",
+        "my_db_name.ipps_wage_index_annual.cbsa_cd",
+        "my_db_name.geo_county_cbsa.metropolitan_division_code",
+        "my_db_name.geo_county_cbsa.cbsa_code",
+        "my_db_name.geo_county_cbsa.ssa_codes",
+    ]
+    assert parser.columns_dict == {
+        "join": [
+            "operations.year",
+            "my_db_name.ipps_wage_index_annual.cms_year",
+            "my_db_name.ipps_wage_index_annual.cbsa_cd",
+            "my_db_name.geo_county_cbsa.metropolitan_division_code",
+            "my_db_name.geo_county_cbsa.cbsa_code",
+            "my_db_name.geo_county_cbsa.ssa_codes",
+        ],
+        "select": ["operations.ID", "operations.year"],
+    }
+    assert parser.tables == [
+        "operations",
+        "my_db_name.ipps_wage_index_annual",
+        "my_db_name.geo_county_cbsa",
+    ]
+
+
+def test_uid_pad_parsed_as_columns():
+    # solved: https://github.com/macbre/sql-metadata/issues/412
+    parser = Parser("SELECT * FROM t1 WHERE uid = 4")
+    assert parser.tables == ["t1"]
+    assert parser.columns == ["*", "uid"]
+    assert parser.columns_dict == {"select": ["*"], "where": ["uid"]}
+
+    parser2 = Parser("SELECT * FROM t1 WHERE pad = 4")
+    assert parser2.tables == ["t1"]
+    assert parser2.columns == ["*", "pad"]
+    assert parser2.columns_dict == {"select": ["*"], "where": ["pad"]}
+
+
+def test_dateadd_unit_not_column():
+    # solved: https://github.com/macbre/sql-metadata/issues/411
+    query = """
+    SELECT
+        dateadd(dd, 30, DateReleased),
+        dateadd(WK, 2, DateReleased)
+    FROM test a
+    """
+    parser = Parser(query)
+    assert parser.tables == ["test"]
+    assert parser.columns == ["DateReleased"]
+    assert parser.tables_aliases == {"a": "test"}
+    assert parser.columns_dict == {"select": ["DateReleased"]}
+
+
+def test_backtick_column_with_operation():
+    # solved: https://github.com/macbre/sql-metadata/issues/448
+    query = "SELECT `col1 with space` / `col2_anything` FROM table1"
+    parser = Parser(query)
+    assert parser.tables == ["table1"]
+    assert parser.columns == ["col1 with space", "col2_anything"]
+    assert parser.columns_dict == {
+        "select": ["col1 with space", "col2_anything"],
+    }
+
+
+def test_separator_not_column():
+    # solved: https://github.com/macbre/sql-metadata/issues/400
+    query = """
+    SELECT JoinedMonth,
+      group_concat(
+      distinct FirstName
+      order by FirstName
+      separator '/') as FirstName
+    FROM customers
+    GROUP BY JoinedMonth
+    """
+    parser = Parser(query)
+    assert parser.columns == ["JoinedMonth", "FirstName"]
+    columns_lower = [c.lower() for c in parser.columns]
+    assert "separator" not in columns_lower
+
+
+def test_mssql_top_columns():
+    # solved: https://github.com/macbre/sql-metadata/issues/318
+    query = "SELECT TOP 10 id, name FROM foo"
+    parser = Parser(query)
+    assert parser.tables == ["foo"]
+    assert parser.columns == ["id", "name"]
+    assert parser.columns_dict == {"select": ["id", "name"]}
+
+
+def test_columns_regex_fallback_on_invalid_insert():
+    """Invalid INSERT falls back to regex for column extraction."""
+    p = Parser("INSERT INTO t (col1, col2, col3) GARBAGE GARBAGE GARBAGE")
+    assert p.columns == ["col1", "col2", "col3"]
+
+
+def test_columns_via_regex_on_completely_invalid_sql():
+    """Totally invalid SQL with INTO...(cols) pattern uses regex fallback."""
+    p = Parser("INTO tbl (col_a, col_b) FROM TO WHERE")
+    assert p.columns == ["col_a", "col_b"]
+
+
+def test_cte_with_more_column_aliases_than_body():
+    """CTE defines more column names than the body SELECT produces."""
+    p = Parser(
+        "WITH cte(a, b, c) AS (SELECT x FROM t) "
+        "SELECT a FROM cte"
+    )
+    assert "a" in p.columns_aliases_names
+
+
+def test_cte_with_table_star_in_body():
+    """CTE body uses table.* — exercises _flat_columns with table-qualified star."""
+    p = Parser(
+        "WITH cte(a) AS (SELECT t.* FROM t) "
+        "SELECT a FROM cte"
+    )
+    assert "t.*" in p.columns or "a" in p.columns_aliases_names
diff --git a/test/test_getting_tables.py b/test/test_getting_tables.py
index d6617037..d50f362b 100644
--- a/test/test_getting_tables.py
+++ b/test/test_getting_tables.py
@@ -286,11 +286,16 @@ def test_table_name_with_group_by():
         == expected_tables
     )
 
-    assert Parser("""
+    assert (
+        Parser(
+            """
                     SELECT s.cust_id,count(s.cust_id) FROM SH.sales s
                     GROUP BY s.cust_id HAVING s.cust_id != '1660'
                     AND s.cust_id != '2'
-                    """.strip()).tables == expected_tables
+                    """.strip()
+        ).tables
+        == expected_tables
+    )
 
 
 def test_datasets():
@@ -777,3 +782,174 @@ def test_subquery_followed_by_tables():
         "customer_address",
         "customer",
     ]
+
+
+def test_joined_on_datetrunc():
+    # solved: https://github.com/macbre/sql-metadata/issues/555
+    query = """SELECT * 
+        FROM test t
+        join test_1 t1
+            on datetrunc('day', t.test_date) = datetrunc('day', t1.test_date)"""
+    parser = Parser(query)
+    assert parser.tables == ["test", "test_1"]
+    assert parser.columns == ["*", "test.test_date", "test_1.test_date"]
+
+
+def test_ifnull_in_on_clause():
+    # solved: https://github.com/macbre/sql-metadata/issues/534
+    query = (
+        "SELECT * FROM table1 a "
+        "LEFT JOIN table2 b ON ifnull(a.col1, '') = ifnull(b.col1, '')"
+    )
+    parser = Parser(query)
+    assert parser.tables == ["table1", "table2"]
+    assert parser.columns == ["*", "table1.col1", "table2.col1"]
+    assert parser.tables_aliases == {"a": "table1", "b": "table2"}
+    assert parser.columns_dict == {
+        "select": ["*"],
+        "join": ["table1.col1", "table2.col1"],
+    }
+
+
+def test_nvl_in_join_condition():
+    # solved: https://github.com/macbre/sql-metadata/issues/446
+    query = "SELECT 1 FROM t1 JOIN t2 ON t1.t2_id = nvl(t2.id, t2.uid)"
+    parser = Parser(query)
+    assert parser.tables == ["t1", "t2"]
+    assert parser.columns == ["t1.t2_id", "t2.id", "t2.uid"]
+    assert parser.columns_dict == {"join": ["t1.t2_id", "t2.id", "t2.uid"]}
+
+
+def test_where_not_table_alias():
+    # solved: https://github.com/macbre/sql-metadata/issues/451
+    parser = Parser("SELECT name FROM employee WHERE age > 25")
+    assert parser.tables == ["employee"]
+    assert parser.columns == ["name", "age"]
+    assert parser.tables_aliases == {}
+    assert parser.columns_dict == {"select": ["name"], "where": ["age"]}
+
+
+def test_column_not_in_tables_with_not_in():
+    # solved: https://github.com/macbre/sql-metadata/issues/457
+    query = """
+    SELECT *
+    FROM TABLE1
+    WHERE
+        SNAPSHOTDATE = (SELECT MAX(SNAPSHOTDATE) FROM TABLE1)
+        AND (MTYPE NOT IN ('Item1', 'Item2'))
+    """
+    parser = Parser(query)
+    assert parser.tables == ["TABLE1"]
+    assert parser.columns == ["*", "SNAPSHOTDATE", "MTYPE"]
+    assert parser.columns_dict == {
+        "select": ["*", "SNAPSHOTDATE"],
+        "where": ["SNAPSHOTDATE", "MTYPE"],
+    }
+
+
+def test_update_alias_not_extra_table():
+    # solved: https://github.com/macbre/sql-metadata/issues/370
+    query = "UPDATE a SET b=1 FROM schema1.testtable AS a"
+    parser = Parser(query)
+    assert "schema1.testtable" in parser.tables
+    assert parser.tables_aliases == {"a": "schema1.testtable"}
+    assert parser.columns == ["b"]
+
+
+def test_select_into_vars_not_tables():
+    # solved: https://github.com/macbre/sql-metadata/issues/397
+    query = "SELECT C1, C2 INTO VAR1, VAR2 FROM TEST_TABLE"
+    parser = Parser(query)
+    assert parser.tables == ["TEST_TABLE"]
+    assert parser.columns == ["C1", "C2"]
+    assert parser.columns_dict == {"select": ["C1", "C2"]}
+
+
+def test_presto_unnest_not_table():
+    # solved: https://github.com/macbre/sql-metadata/issues/284
+    query = """
+    SELECT col_
+    FROM my_table
+    CROSS JOIN UNNEST(my_col) AS t(col_)
+    """
+    parser = Parser(query)
+    assert parser.tables == ["my_table"]
+    assert "col_" in parser.columns
+
+
+def test_bigquery_unnest_not_table():
+    # Solved: https://github.com/macbre/sql-metadata/issues/352
+    p = Parser(
+        "SELECT A, B, metrics.C, metrics.D "
+        "FROM table1, UNNEST(metrics) as metrics"
+    )
+    assert p.tables == ["table1"]
+    assert "metrics" in p.columns
+
+
+def test_from_order_does_not_affect_tables():
+    # solved: https://github.com/macbre/sql-metadata/issues/335
+    query1 = "SELECT aa FROM (SELECT bb FROM bbb GROUP BY bb) AS a, omg"
+    query2 = "SELECT aa FROM omg, (SELECT bb FROM bbb GROUP BY bb) AS a"
+    parser1 = Parser(query1)
+    parser2 = Parser(query2)
+    assert set(parser1.tables) == {"bbb", "omg"}
+    assert set(parser2.tables) == {"bbb", "omg"}
+    assert set(parser1.columns) == {"aa", "bb"}
+    assert set(parser2.columns) == {"aa", "bb"}
+
+
+def test_complex_subquery_join_tables():
+    # solved: https://github.com/macbre/sql-metadata/issues/324
+    query = """
+    SELECT * FROM
+    (   (SELECT a1, a2 FROM ta1) tt1
+        LEFT JOIN
+        (SELECT b1, b2 FROM tb1) tt2
+        ON tt1.a1 = tt2.b1) tt3
+    """
+    parser = Parser(query)
+    assert parser.tables == ["ta1", "tb1"]
+    assert parser.columns == ["*", "a1", "a2", "b1", "b2"]
+
+
+def test_on_keyword_not_table_alias():
+    # solved: https://github.com/macbre/sql-metadata/issues/537
+    parser = Parser(
+        """
+        WITH
+            database1.tableFromWith AS (SELECT aa.* FROM table3 as aa
+                                        left join table4 on aa.col1=table4.col2),
+            test as (SELECT * from table3)
+        SELECT "xxxxx"
+        FROM database1.tableFromWith alias
+        LEFT JOIN database2.table2 ON ("tt"."ttt"."fff" = "xx"."xxx")
+        """
+    )
+    assert parser.tables == ["table3", "table4", "database2.table2"]
+    assert "on" not in parser.tables_aliases
+    assert "ON" not in parser.tables_aliases
+    assert parser.tables_aliases == {"aa": "table3"}
+
+
+def test_unmatched_parentheses_graceful():
+    # solved: https://github.com/macbre/sql-metadata/issues/532
+    # Should not raise IndexError; graceful handling of malformed SQL
+    try:
+        parser = Parser("SELECT arrayJoin(tags.key)) FROM foo")
+        _ = parser.tables
+    except (ValueError, Exception):
+        pass
+
+
+def test_degraded_parse_falls_through_to_last_dialect():
+    """SELECT UNIQUE triggers multi-dialect retry."""
+    p = Parser("SELECT UNIQUE col FROM t")
+    assert "t" in p.tables
+
+
+def test_parenthesized_select_unwrapping():
+    """Parenthesized top-level SELECT is correctly unwrapped."""
+    p = Parser("(SELECT a, b FROM t)")
+    assert p.tables == ["t"]
+    assert p.columns == ["a", "b"]
diff --git a/test/test_hive.py b/test/test_hive.py
index 7dd00b49..b532b35d 100644
--- a/test/test_hive.py
+++ b/test/test_hive.py
@@ -46,3 +46,113 @@ def test_complex_hive_query():
         "rollup_wiki_beacon_pageviews",
         "statsdb.dimension_wikis",
     ] == Parser(dag).tables
+
+
+def test_hive_alter_table_drop_partition():
+    # solved: https://github.com/macbre/sql-metadata/issues/495
+    query = "ALTER TABLE table_name DROP IF EXISTS PARTITION (dt = 20240524)"
+    parser = Parser(query)
+    assert parser.tables == ["table_name"]
+    assert "PARTITION" not in parser.tables
+    assert "dt" not in parser.tables
+
+
+def test_hive_insert_overwrite_with_partition():
+    # solved: https://github.com/macbre/sql-metadata/issues/502
+    query = """
+    INSERT OVERWRITE TABLE tbl PARTITION (dt='20240101')
+    SELECT col1, col2 FROM table1
+    JOIN table2 ON table1.id = table2.id
+    """
+    parser = Parser(query)
+    assert parser.tables == ["tbl", "table1", "table2"]
+    assert "dt" not in parser.tables
+    assert parser.columns == ["col1", "col2", "table1.id", "table2.id"]
+    assert parser.columns_dict == {
+        "select": ["col1", "col2"],
+        "join": ["table1.id", "table2.id"],
+    }
+
+
+def test_lateral_view_not_in_tables():
+    # Solved: https://github.com/macbre/sql-metadata/issues/369
+    # LATERAL VIEW aliases should not appear as tables
+    parser = Parser("""SELECT event_day, action_type
+        FROM t
+        LATERAL VIEW EXPLODE(ARRAY(1, 2)) lv AS action_type""")
+    assert parser.tables == ["t"]
+    assert parser.columns == ["event_day", "action_type"]
+
+
+def test_array_subscript_with_lateral_view():
+    # Solved: https://github.com/macbre/sql-metadata/issues/369
+    # Array subscript [n] should not trigger MSSQL bracketed dialect
+    parser = Parser("""SELECT max(split(fourth_category, '~')[2]) AS ch_4th_class
+        FROM t
+        LATERAL VIEW EXPLODE(ARRAY(1, 2)) lv AS action_type""")
+    assert parser.tables == ["t"]
+
+
+def test_complex_lateral_view_with_array_subscript():
+    # Solved: https://github.com/macbre/sql-metadata/issues/369
+    parser = Parser("""select
+        event_day,
+        cuid,
+        event_product_all,
+        max(os_name) as os_name,
+        max(app_version) as app_version,
+        max(if(event_product_all ='tomas',
+            if(is_bdapp_new='1',ch_4th_class,'-'),ta.channel)) as channel,
+        max(age) as age,
+        max(age_point) as age_point,
+        max(is_bdapp_new) as is_new,
+        action_type,
+        max(if(is_feed_dau=1, immersive_type, 0)) AS detail_page_type
+    from
+    (
+        select  event_day,
+                event_product_all,
+                os_name,
+                app_version,
+                channel,
+                age,
+                age_point,
+                is_bdapp_new,
+                action_type,
+                is_feed_dau,
+                immersive_type,
+                attr_channel
+        from bdapp_ads_bhv_cuid_all_1d
+        lateral view explode(array(
+            case when is_bdapp_dau=1 then 'bdapp' end,
+            case when is_feed_dau=1 then 'feed' end,
+            case when is_search_dau=1 then 'search' end,
+            case when is_novel_dau=1 then 'novel' end,
+            case when is_tts_dau=1 then 'radio' end
+            )) lv AS action_type
+        lateral view explode(
+            case when event_product = 'lite'
+                and appid in ('hao123', 'flyflow', 'lite_mission')
+                then array('lite', appid)
+            when event_product = 'lite' and appid = '10001'
+                then array('lite', 'purelite')
+            else array(event_product) end
+            ) lv AS event_product_all
+        where event_day in ('20230102')
+            and event_product in ('lite', 'tomas')
+            and is_bdapp_dau = '1'
+            and action_type is not null
+    )ta
+    left outer join
+    (
+        select channel,max(split(fourth_category,'~')[2]) as ch_4th_class
+        from udw_ns.default.ug_dim_channel_new_df
+        where event_day = '20230102'
+        group by  channel
+    )tb on ta.attr_channel=tb.channel
+    group by event_day, cuid, event_product_all, action_type
+    limit 100""")
+    assert parser.tables == [
+        "bdapp_ads_bhv_cuid_all_1d",
+        "udw_ns.default.ug_dim_channel_new_df",
+    ]
diff --git a/test/test_limit_and_offset.py b/test/test_limit_and_offset.py
index 1fd6aaeb..b1ae7cba 100644
--- a/test/test_limit_and_offset.py
+++ b/test/test_limit_and_offset.py
@@ -52,3 +52,54 @@ def test_with_in_condition():
     assert Parser(
         "SELECT count(*) FROM aa WHERE userid IN (222,333) LIMIT 50 OFFSET 1000"
     ).limit_and_offset == (50, 1000)
+
+
+def test_limit_and_offset_on_update():
+    """UPDATE has no LIMIT — returns None."""
+    assert Parser("UPDATE t SET col = 1 WHERE id = 5").limit_and_offset is None
+
+
+def test_limit_and_offset_on_insert():
+    """INSERT has no LIMIT — returns None."""
+    assert Parser("INSERT INTO t (a) VALUES (1)").limit_and_offset is None
+
+
+def test_limit_with_parameter_placeholder():
+    """LIMIT with a non-numeric placeholder triggers int conversion failure."""
+    assert Parser("SELECT col FROM t LIMIT :limit").limit_and_offset is None
+
+
+def test_limit_regex_mysql_comma_via_subquery():
+    """Regex fallback finds MySQL comma LIMIT in subquery.
+
+    LIMIT ALL makes sqlglot produce a non-integer limit node, triggering the
+    regex fallback which then matches the inner subquery's LIMIT 10, 20.
+    """
+    p = Parser(
+        "SELECT * FROM (SELECT id FROM t LIMIT 10, 20) AS sub LIMIT ALL"
+    )
+    assert p.limit_and_offset == (20, 10)
+
+
+def test_limit_regex_standard_via_subquery():
+    """Regex fallback finds standard LIMIT in subquery."""
+    p = Parser(
+        "SELECT * FROM (SELECT id FROM t LIMIT 30) AS sub"
+        " FETCH FIRST 5 ROWS ONLY"
+    )
+    assert p.limit_and_offset == (30, 0)
+
+
+def test_limit_regex_with_offset_via_subquery():
+    """Regex fallback finds LIMIT with OFFSET when outer is unparseable."""
+    p = Parser(
+        "SELECT * FROM (SELECT id FROM t LIMIT 50 OFFSET 100)"
+        " AS sub LIMIT ALL"
+    )
+    assert p.limit_and_offset == (50, 100)
+
+
+def test_limit_and_offset_comment_only():
+    """LIMIT/OFFSET on comment-only SQL returns None (AST is None)."""
+    p = Parser("/* just a comment */")
+    assert p.limit_and_offset is None
diff --git a/test/test_mssql_server.py b/test/test_mssql_server.py
index abf4cab1..82081082 100644
--- a/test/test_mssql_server.py
+++ b/test/test_mssql_server.py
@@ -104,7 +104,7 @@ def test_sql_server_cte_sales_by_year():
     assert parser.tables == ["sales.orders"]
     assert parser.with_names == ["cte_sales"]
     assert parser.with_queries == {
-        "cte_sales": "SELECT staff_id, COUNT(*) order_count FROM sales.orders WHERE "
+        "cte_sales": "SELECT staff_id, COUNT(*) AS order_count FROM sales.orders WHERE "
         "YEAR(order_date) = 2018 GROUP BY staff_id"
     }
     assert parser.columns_aliases_names == ["order_count", "average_orders_by_staff"]
@@ -181,3 +181,9 @@ def test_partition_over_with_row_number_and_many_orders():
         "select": ["col_one", "col_two", "col_three", "col_four"],
         "where": ["col_one", "col_two", "col_three", "col_four"],
     }
+
+
+def test_mssql_catalog_double_dot():
+    """SQL Server three-part name with empty db: catalog..table."""
+    p = Parser("SELECT * FROM mydb..orders")
+    assert "mydb..orders" in p.tables
diff --git a/test/test_multiple_subqueries.py b/test/test_multiple_subqueries.py
index 84d13124..a03442b1 100644
--- a/test/test_multiple_subqueries.py
+++ b/test/test_multiple_subqueries.py
@@ -81,6 +81,7 @@ def test_multiple_subqueries():
     assert parser.subqueries_names == [
         "jrah2",
         "main_qry",
+        "subquery_1",
         "days_sqry",
         "days_final_qry",
         "subdays",
@@ -135,87 +136,92 @@ def test_multiple_subqueries():
         "presentation.job_request_id",
     ]
     assert parser.subqueries == {
-        "days_final_qry": "SELECT PROJECT_ID, days_to_offer, (SELECT count(distinct "
-        "jro.job_request_application_id) from job_request_offer jro "
-        "left join job_request_application jra2 on "
-        "jro.job_request_application_id = jra2.id where "
-        "jra2.job_request_id = PROJECT_ID and "
-        "jro.first_presented_date is not null and "
-        "jro.first_presented_date <= InitialChangeDate) as RowNo "
-        "from (SELECT jr.id as PROJECT_ID, 5 * "
+        "days_final_qry": "SELECT PROJECT_ID, days_to_offer, (SELECT COUNT(DISTINCT "
+        "jro.job_request_application_id) FROM job_request_offer AS jro "
+        "LEFT JOIN job_request_application AS jra2 ON "
+        "jro.job_request_application_id = jra2.id WHERE "
+        "jra2.job_request_id = PROJECT_ID AND "
+        "jro.first_presented_date IS NOT NULL AND "
+        "jro.first_presented_date <= InitialChangeDate) AS RowNo "
+        "FROM (SELECT jr.id AS PROJECT_ID, 5 * "
         "(DATEDIFF(jro.first_presented_date, jr.creation_date) DIV "
         "7) + "
         "MID('0123444401233334012222340111123400001234000123440', 7 "
         "* WEEKDAY(jr.creation_date) + "
-        "WEEKDAY(jro.first_presented_date) + 1, 1) as "
+        "WEEKDAY(jro.first_presented_date) + 1, 1) AS "
         "days_to_offer, jro.job_request_application_id, "
-        "jro.first_presented_date as InitialChangeDate from "
-        "presentation pr left join presentation_job_request_offer "
-        "pjro on pr.id = pjro.presentation_id left join "
-        "job_request_offer jro on pjro.job_request_offer_id = "
-        "jro.id left join job_request jr on pr.job_request_id = "
-        "jr.id where jro.first_presented_date is not null) "
+        "jro.first_presented_date AS InitialChangeDate FROM "
+        "presentation AS pr LEFT JOIN presentation_job_request_offer "
+        "AS pjro ON pr.id = pjro.presentation_id LEFT JOIN "
+        "job_request_offer AS jro ON pjro.job_request_offer_id = "
+        "jro.id LEFT JOIN job_request AS jr ON pr.job_request_id = "
+        "jr.id WHERE jro.first_presented_date IS NOT NULL) AS "
         "days_sqry",
-        "days_sqry": "SELECT jr.id as PROJECT_ID, 5 * "
+        "days_sqry": "SELECT jr.id AS PROJECT_ID, 5 * "
         "(DATEDIFF(jro.first_presented_date, jr.creation_date) DIV 7) + "
         "MID('0123444401233334012222340111123400001234000123440', 7 * "
         "WEEKDAY(jr.creation_date) + WEEKDAY(jro.first_presented_date) + "
-        "1, 1) as days_to_offer, jro.job_request_application_id, "
-        "jro.first_presented_date as InitialChangeDate from presentation "
-        "pr left join presentation_job_request_offer pjro on pr.id = "
-        "pjro.presentation_id left join job_request_offer jro on "
-        "pjro.job_request_offer_id = jro.id left join job_request jr on "
-        "pr.job_request_id = jr.id where jro.first_presented_date is not "
-        "null",
-        "jrah2": "SELECT jro2.job_request_application_id, max(case when "
-        "jro2.first_interview_scheduled_date is not null then 1 else 0 end) "
-        "as IS_INTERVIEW, max(case when jro2.first_presented_date is not "
-        "null then 1 else 0 end) as IS_PRESENTATION from job_request_offer "
-        "jro2 group by 1",
-        "main_qry": "SELECT jr.id as PROJECT_ID, 5 * "
-        "(DATEDIFF(ifnull(lc.creation_date, now()), jr.creation_date) DIV "
+        "1, 1) AS days_to_offer, jro.job_request_application_id, "
+        "jro.first_presented_date AS InitialChangeDate FROM presentation "
+        "AS pr LEFT JOIN presentation_job_request_offer AS pjro ON pr.id = "
+        "pjro.presentation_id LEFT JOIN job_request_offer AS jro ON "
+        "pjro.job_request_offer_id = jro.id LEFT JOIN job_request AS jr ON "
+        "pr.job_request_id = jr.id WHERE jro.first_presented_date IS NOT "
+        "NULL",
+        "jrah2": "SELECT jro2.job_request_application_id, MAX(CASE WHEN "
+        "jro2.first_interview_scheduled_date IS NOT NULL THEN 1 ELSE 0 END) "
+        "AS IS_INTERVIEW, MAX(CASE WHEN jro2.first_presented_date IS NOT "
+        "NULL THEN 1 ELSE 0 END) AS IS_PRESENTATION FROM job_request_offer "
+        "AS jro2 GROUP BY 1",
+        "main_qry": "SELECT jr.id AS PROJECT_ID, 5 * "
+        "(DATEDIFF(IFNULL(lc.creation_date, NOW()), jr.creation_date) DIV "
         "7) + MID('0123444401233334012222340111123400001234000123440', 7 "
-        "* WEEKDAY(jr.creation_date) + WEEKDAY(ifnull(lc.creation_date, "
-        "now())) + 1, 1) as LIFETIME, count(distinct case when "
-        "jra.application_source = 'VERAMA' then jra.id else null end) "
-        "NUM_APPLICATIONS, count(distinct jra.id) NUM_CANDIDATES, "
-        "sum(case when jro.stage = 'DEAL' then 1 else 0 end) as "
-        "NUM_CONTRACTED, sum(ifnull(IS_INTERVIEW, 0)) as NUM_INTERVIEWED, "
-        "sum(ifnull(IS_PRESENTATION, 0)) as NUM_OFFERED from job_request "
-        "jr left join job_request_application jra on jr.id = "
-        "jra.job_request_id left join job_request_offer jro on "
-        "jro.job_request_application_id = jra.id left join lifecycle lc "
-        "on lc.object_id = jr.id and lc.lifecycle_object_type = "
-        "'JOB_REQUEST' and lc.event = 'JOB_REQUEST_CLOSED' left join "
-        "(SELECT jro2.job_request_application_id, max(case when "
-        "jro2.first_interview_scheduled_date is not null then 1 else 0 "
-        "end) as IS_INTERVIEW, max(case when jro2.first_presented_date is "
-        "not null then 1 else 0 end) as IS_PRESENTATION from "
-        "job_request_offer jro2 group by 1) jrah2 on jra.id = "
-        "jrah2.job_request_application_id left join client u on "
-        "jr.client_id = u.id where jr.from_point_break = 0 and u.name not "
-        "in ('Test', 'Demo Client') group by 1, 2",
-        "subdays": "SELECT PROJECT_ID, sum(case when RowNo = 1 then days_to_offer "
-        "else null end) as DAYS_OFFER1, sum(case when RowNo = 2 then "
-        "days_to_offer else null end) as DAYS_OFFER2, sum(case when RowNo "
-        "= 3 then days_to_offer else null end) as DAYS_OFFER3 from (SELECT "
-        "PROJECT_ID, days_to_offer, (SELECT count(distinct "
-        "jro.job_request_application_id) from job_request_offer jro left "
-        "join job_request_application jra2 on "
-        "jro.job_request_application_id = jra2.id where "
-        "jra2.job_request_id = PROJECT_ID and jro.first_presented_date is "
-        "not null and jro.first_presented_date <= InitialChangeDate) as "
-        "RowNo from (SELECT jr.id as PROJECT_ID, 5 * "
+        "* WEEKDAY(jr.creation_date) + WEEKDAY(IFNULL(lc.creation_date, "
+        "NOW())) + 1, 1) AS LIFETIME, COUNT(DISTINCT CASE WHEN "
+        "jra.application_source = 'VERAMA' THEN jra.id ELSE NULL END) "
+        "AS NUM_APPLICATIONS, COUNT(DISTINCT jra.id) AS NUM_CANDIDATES, "
+        "SUM(CASE WHEN jro.stage = 'DEAL' THEN 1 ELSE 0 END) AS "
+        "NUM_CONTRACTED, SUM(IFNULL(IS_INTERVIEW, 0)) AS NUM_INTERVIEWED, "
+        "SUM(IFNULL(IS_PRESENTATION, 0)) AS NUM_OFFERED FROM job_request "
+        "AS jr LEFT JOIN job_request_application AS jra ON jr.id = "
+        "jra.job_request_id LEFT JOIN job_request_offer AS jro ON "
+        "jro.job_request_application_id = jra.id LEFT JOIN lifecycle AS lc "
+        "ON lc.object_id = jr.id AND lc.lifecycle_object_type = "
+        "'JOB_REQUEST' AND lc.event = 'JOB_REQUEST_CLOSED' LEFT JOIN "
+        "(SELECT jro2.job_request_application_id, MAX(CASE WHEN "
+        "jro2.first_interview_scheduled_date IS NOT NULL THEN 1 ELSE 0 "
+        "END) AS IS_INTERVIEW, MAX(CASE WHEN jro2.first_presented_date IS "
+        "NOT NULL THEN 1 ELSE 0 END) AS IS_PRESENTATION FROM "
+        "job_request_offer AS jro2 GROUP BY 1) AS jrah2 ON jra.id = "
+        "jrah2.job_request_application_id LEFT JOIN client AS u ON "
+        "jr.client_id = u.id WHERE jr.from_point_break = 0 AND u.name NOT "
+        "IN ('Test', 'Demo Client') GROUP BY 1, 2",
+        "subquery_1": "SELECT COUNT(DISTINCT jro.job_request_application_id) FROM "
+        "job_request_offer AS jro LEFT JOIN job_request_application AS jra2 ON "
+        "jro.job_request_application_id = jra2.id WHERE jra2.job_request_id = "
+        "PROJECT_ID AND jro.first_presented_date IS NOT NULL AND "
+        "jro.first_presented_date <= InitialChangeDate",
+        "subdays": "SELECT PROJECT_ID, SUM(CASE WHEN RowNo = 1 THEN days_to_offer "
+        "ELSE NULL END) AS DAYS_OFFER1, SUM(CASE WHEN RowNo = 2 THEN "
+        "days_to_offer ELSE NULL END) AS DAYS_OFFER2, SUM(CASE WHEN RowNo "
+        "= 3 THEN days_to_offer ELSE NULL END) AS DAYS_OFFER3 FROM (SELECT "
+        "PROJECT_ID, days_to_offer, (SELECT COUNT(DISTINCT "
+        "jro.job_request_application_id) FROM job_request_offer AS jro LEFT "
+        "JOIN job_request_application AS jra2 ON "
+        "jro.job_request_application_id = jra2.id WHERE "
+        "jra2.job_request_id = PROJECT_ID AND jro.first_presented_date IS "
+        "NOT NULL AND jro.first_presented_date <= InitialChangeDate) AS "
+        "RowNo FROM (SELECT jr.id AS PROJECT_ID, 5 * "
         "(DATEDIFF(jro.first_presented_date, jr.creation_date) DIV 7) + "
         "MID('0123444401233334012222340111123400001234000123440', 7 * "
         "WEEKDAY(jr.creation_date) + WEEKDAY(jro.first_presented_date) + "
-        "1, 1) as days_to_offer, jro.job_request_application_id, "
-        "jro.first_presented_date as InitialChangeDate from presentation "
-        "pr left join presentation_job_request_offer pjro on pr.id = "
-        "pjro.presentation_id left join job_request_offer jro on "
-        "pjro.job_request_offer_id = jro.id left join job_request jr on "
-        "pr.job_request_id = jr.id where jro.first_presented_date is not "
-        "null) days_sqry) days_final_qry group by PROJECT_ID",
+        "1, 1) AS days_to_offer, jro.job_request_application_id, "
+        "jro.first_presented_date AS InitialChangeDate FROM presentation "
+        "AS pr LEFT JOIN presentation_job_request_offer AS pjro ON pr.id = "
+        "pjro.presentation_id LEFT JOIN job_request_offer AS jro ON "
+        "pjro.job_request_offer_id = jro.id LEFT JOIN job_request AS jr ON "
+        "pr.job_request_id = jr.id WHERE jro.first_presented_date IS NOT "
+        "NULL) AS days_sqry) AS days_final_qry GROUP BY PROJECT_ID",
     }
 
 
@@ -259,9 +265,9 @@ def test_multiline_queries():
     }
 
     assert parser.subqueries == {
-        "a": "SELECT std.task_id as new_task_id "
-        "FROM some_task_detail std WHERE std.STATUS = 1",
-        "b": "SELECT st.task_id FROM some_task st WHERE task_type_id = 80",
+        "a": "SELECT std.task_id AS new_task_id "
+        "FROM some_task_detail AS std WHERE std.STATUS = 1",
+        "b": "SELECT st.task_id FROM some_task AS st WHERE task_type_id = 80",
     }
 
     parser2 = Parser(parser.subqueries["a"])
@@ -417,8 +423,8 @@ def test_readme_query():
         ON a.task_id = b.task_id;
         """)
     assert parser.subqueries == {
-        "a": "SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1",
-        "b": "SELECT st.task_id FROM some_task st WHERE task_type_id = 80",
+        "a": "SELECT std.task_id FROM some_task_detail AS std WHERE std.STATUS = 1",
+        "b": "SELECT st.task_id FROM some_task AS st WHERE task_type_id = 80",
     }
     assert parser.subqueries_names == ["a", "b"]
     assert parser.columns == [
@@ -432,3 +438,173 @@ def test_readme_query():
         "select": ["some_task_detail.task_id", "some_task.task_id"],
         "where": ["some_task_detail.STATUS", "task_type_id"],
     }
+
+
+def test_subquery_extraction_with_case():
+    # solved: https://github.com/macbre/sql-metadata/issues/469
+    query = """
+    SELECT o_year,
+        sum(case when nation = 'KENYA' then volume else 0 end)
+        / sum(volume) as mkt_share
+    FROM (
+        SELECT extract(year from o_orderdate) as o_year,
+            l_extendedprice * (1 - l_discount) as volume,
+            n2.n_name as nation
+        FROM part, supplier, lineitem, orders, customer,
+             nation n1, nation n2, region
+        WHERE p_partkey = l_partkey
+            AND s_suppkey = l_suppkey
+            AND l_orderkey = o_orderkey
+            AND o_custkey = c_custkey
+            AND c_nationkey = n1.n_nationkey
+            AND n1.n_regionkey = r_regionkey
+            AND r_name = 'AFRICA'
+            AND s_nationkey = n2.n_nationkey
+            AND o_orderdate BETWEEN date '1995-01-01' AND date '1996-12-31'
+            AND p_type = 'PROMO POLISHED NICKEL'
+    ) as all_nations
+    GROUP BY o_year
+    ORDER BY o_year
+    """
+    parser = Parser(query)
+    assert "part" in parser.tables
+    assert "supplier" in parser.tables
+    assert "lineitem" in parser.tables
+    assert "orders" in parser.tables
+    assert "customer" in parser.tables
+    assert "nation" in parser.tables
+    assert "region" in parser.tables
+    assert "o_orderdate" in parser.columns
+
+
+def test_column_alias_same_as_subquery_alias():
+    # solved: https://github.com/macbre/sql-metadata/issues/306
+    query = """
+    SELECT a.id as a_id, b.name as b_name
+    FROM table_a AS a
+    LEFT JOIN (SELECT * FROM table_b) AS b_name ON 1=1
+    """
+    parser = Parser(query)
+    assert parser.tables == ["table_a", "table_b"]
+    assert "table_a.id" in parser.columns
+    assert "*" in parser.columns
+
+
+def test_subquery_in_select_closing_parens():
+    # solved: https://github.com/macbre/sql-metadata/issues/447
+    query = """
+    SELECT a.pt_no, b.pt_name,
+        (SELECT dept_name FROM depart d WHERE a.dept_cd = d.dept_cd),
+        a.c_no, a.cls
+    FROM clinmt a, tbamv b
+    """
+    parser = Parser(query)
+    assert parser.tables == ["depart", "clinmt", "tbamv"]
+    assert parser.tables_aliases == {"a": "clinmt", "b": "tbamv", "d": "depart"}
+    assert "clinmt.pt_no" in parser.columns
+    assert "tbamv.pt_name" in parser.columns
+    assert "dept_name" in parser.columns
+    assert "clinmt.c_no" in parser.columns
+    assert "clinmt.cls" in parser.columns
+
+
+def test_subquery_alias_with_inner_column():
+    """Alias wrapping a scalar subquery that returns a column."""
+    p = Parser("SELECT (SELECT col FROM t LIMIT 1) AS x FROM s")
+    assert "x" in p.columns_aliases_names
+
+
+def test_subquery_alias_with_inner_star():
+    """Alias wrapping a scalar subquery that uses SELECT *."""
+    p = Parser("SELECT (SELECT * FROM t LIMIT 1) AS x FROM s")
+    assert "x" in p.columns_aliases_names
+
+
+def test_subquery_alias_with_inner_alias():
+    """Alias wrapping a scalar subquery that returns an alias."""
+    p = Parser("SELECT (SELECT col AS c FROM t LIMIT 1) AS x FROM s")
+    assert "x" in p.columns_aliases_names
+
+
+def test_subquery_alias_in_columns_dict():
+    # Solved: https://github.com/macbre/sql-metadata/issues/528
+    p = Parser(
+        "SELECT ap.[AccountId], "
+        "(SELECT COUNT(*) FROM [Transactions] t "
+        "WHERE t.[AccountId] = ap.[AccountId]) AS TransactionCount "
+        "FROM [AccountProfiles] ap"
+    )
+    assert p.tables == ["[Transactions]", "[AccountProfiles]"]
+    assert p.columns == ["ap.AccountId", "t.AccountId"]
+    assert p.columns_dict == {
+        "select": ["ap.AccountId", "TransactionCount"],
+        "where": ["t.AccountId", "ap.AccountId"],
+    }
+    assert "TransactionCount" in p.columns_aliases_names
+
+
+def test_subquery_alias_with_aggregate_column():
+    # Related to https://github.com/macbre/sql-metadata/issues/528
+    # MAX(col) resolves alias to real column, unlike COUNT(*)
+    p = Parser(
+        "SELECT ap.[AccountId], "
+        "(SELECT MAX(t.[Id]) FROM [Transactions] t "
+        "WHERE t.[AccountId] = ap.[AccountId]) AS MaxTransactionId "
+        "FROM [AccountProfiles] ap"
+    )
+    assert p.tables == ["[Transactions]", "[AccountProfiles]"]
+    assert p.columns == ["ap.AccountId", "t.Id", "t.AccountId"]
+    assert p.columns_dict == {
+        "select": ["ap.AccountId", "t.Id"],
+        "where": ["t.AccountId", "ap.AccountId"],
+    }
+    assert p.columns_aliases == {"MaxTransactionId": "t.Id"}
+
+
+def test_unaliased_subquery():
+    # Solved: https://github.com/macbre/sql-metadata/issues/365
+    query = """SELECT * FROM customers
+    WHERE id IN (
+      SELECT customer_id FROM reservations
+      WHERE year(reservation_date) = year(now())
+      GROUP BY customer_id
+      ORDER BY count(*) DESC LIMIT 1
+    )"""
+    p = Parser(query)
+    assert p.tables == ["customers", "reservations"]
+    assert p.subqueries_names == ["subquery_1"]
+    assert "subquery_1" in p.subqueries
+
+
+def test_multiple_unaliased_subqueries():
+    p = Parser(
+        "SELECT * FROM t "
+        "WHERE a IN (SELECT id FROM t2) "
+        "AND b IN (SELECT id FROM t3)"
+    )
+    assert p.subqueries_names == ["subquery_1", "subquery_2"]
+    assert "subquery_1" in p.subqueries
+    assert "subquery_2" in p.subqueries
+
+
+def test_mixed_aliased_and_unaliased_subqueries():
+    p = Parser(
+        "SELECT * FROM (SELECT id FROM t2) sub "
+        "WHERE a IN (SELECT id FROM t3)"
+    )
+    assert "sub" in p.subqueries_names
+    assert "subquery_1" in p.subqueries_names
+    assert "sub" in p.subqueries
+    assert "subquery_1" in p.subqueries
+
+
+def test_subquery_bodies_empty_when_no_subquery():
+    """A query with no subqueries has empty subqueries dict."""
+    p = Parser("SELECT * FROM t")
+    assert p.subqueries == {}
+
+
+def test_subquery_names_empty_when_no_subquery():
+    """A query with no subqueries returns empty subqueries_names."""
+    p = Parser("SELECT * FROM t")
+    assert p.subqueries_names == []
diff --git a/test/test_query.py b/test/test_query.py
index 5a229667..fd572c0f 100644
--- a/test/test_query.py
+++ b/test/test_query.py
@@ -7,10 +7,10 @@ def test_get_query_tokens():
     tokens = Parser("SELECT * FROM foo").tokens
 
     assert len(tokens) == 4
-    assert str(tokens[0]) == "SELECT"
-    assert tokens[1].is_wildcard
-    assert tokens[2].is_keyword
-    assert str(tokens[2]) == "FROM"
+    assert tokens[0] == "SELECT"
+    assert tokens[1] == "*"
+    assert tokens[2] == "FROM"
+    assert tokens[3] == "foo"
 
 
 def test_preprocessing():
@@ -126,3 +126,16 @@ def test_case_syntax():
     assert Parser(
         "SELECT case when p > 0 then 1 else 0 end as cs from c where g > f"
     ).tables == ["c"]
+
+
+def test_empty_query_property():
+    """The query property returns empty string for empty SQL."""
+    assert Parser("").query == ""
+
+
+def test_tokens_caching():
+    """Second access to tokens returns the cached list."""
+    p = Parser("SELECT col FROM t")
+    first = p.tokens
+    second = p.tokens
+    assert first is second
diff --git a/test/test_query_type.py b/test/test_query_type.py
index b9e3486f..a5a8e3ee 100644
--- a/test/test_query_type.py
+++ b/test/test_query_type.py
@@ -1,6 +1,6 @@
 import pytest
 
-from sql_metadata import Parser, QueryType
+from sql_metadata import InvalidQueryDefinition, Parser, QueryType
 
 
 def test_insert_query():
@@ -55,26 +55,26 @@ def test_unsupported_query(caplog):
     ]
 
     for query in queries:
-        with pytest.raises(ValueError) as ex:
+        with pytest.raises(InvalidQueryDefinition) as ex:
             _ = Parser(query).query_type
 
         assert "Not supported query type!" in str(ex.value)
 
         # assert the SQL query is not logged
         # https://docs.pytest.org/en/stable/how-to/logging.html#caplog-fixture
-        assert (
-            f"Not supported query type: {query}" not in caplog.text
-        ), "The SQL query should not be logged"
-        assert (
-            f"Not supported query type: {query[:8]}" in caplog.text
-        ), "The SQL query should be trimmed when logged"
+        assert f"Not supported query type: {query}" not in caplog.text, (
+            "The SQL query should not be logged"
+        )
+        assert f"Not supported query type: {query[:8]}" in caplog.text, (
+            "The SQL query should be trimmed when logged"
+        )
 
 
 def test_empty_query():
     queries = ["", "/* empty query */"]
 
     for query in queries:
-        with pytest.raises(ValueError) as ex:
+        with pytest.raises(InvalidQueryDefinition) as ex:
             _ = Parser(query).query_type
 
         assert "Empty queries are not supported!" in str(ex.value)
@@ -121,3 +121,62 @@ def test_hive_create_function():
     """
     parser = Parser(query)
     assert parser.query_type == QueryType.CREATE
+
+
+def test_merge_into_query_type():
+    # solved: https://github.com/macbre/sql-metadata/issues/354
+    query = """
+    MERGE INTO wines w
+    USING (VALUES('Chateau Lafite 2003', '24')) v
+    ON v.column1 = w.winename
+    WHEN NOT MATCHED THEN INSERT VALUES(v.column1, v.column2)
+    WHEN MATCHED THEN UPDATE SET stock = stock + v.column2
+    """
+    parser = Parser(query)
+    assert parser.query_type == QueryType.MERGE
+    assert parser.tables == ["wines"]
+    assert parser.columns == [
+        "v.column1",
+        "wines.winename",
+        "v.column2",
+        "stock",
+    ]
+    assert parser.tables_aliases == {"w": "wines"}
+
+
+def test_create_temporary_table():
+    # solved: https://github.com/macbre/sql-metadata/issues/439
+    query = "CREATE TEMPORARY TABLE tablname AS SELECT * FROM source_table"
+    parser = Parser(query)
+    assert parser.query_type == QueryType.CREATE
+    assert "tablname" in parser.tables
+    assert "source_table" in parser.tables
+    assert parser.columns == ["*"]
+
+
+def test_malformed_with_no_main_query():
+    """WITH clause not followed by a main statement is rejected."""
+    with pytest.raises(
+        InvalidQueryDefinition, match="WITH clause without a main statement"
+    ):
+        Parser("WITH cte AS (SELECT 1)").query_type
+
+
+def test_unrecognized_command_type():
+    """A query that parses as Command but isn't ALTER/CREATE."""
+    with pytest.raises(InvalidQueryDefinition, match="Not supported query type"):
+        Parser("SHOW TABLES").query_type
+
+
+def test_deeply_parenthesized_query():
+    """Triple-parenthesized SELECT parses correctly."""
+    p = Parser("(((SELECT col FROM t)))")
+    assert p.query_type == "SELECT"
+    assert p.tables == ["t"]
+    assert p.columns == ["col"]
+
+
+def test_execute_command_not_supported():
+    """EXECUTE parses as Command but isn't a known type — raises ValueError."""
+    with pytest.raises(InvalidQueryDefinition, match="Not supported query type"):
+        Parser("EXECUTE sp_help").query_type
diff --git a/test/test_sqlite.py b/test/test_sqlite.py
index f0233535..1c8db2f3 100644
--- a/test/test_sqlite.py
+++ b/test/test_sqlite.py
@@ -10,3 +10,14 @@ def test_natural_join():
 
     assert ["table1", "table2"] == Parser(query).tables
     assert ["id"] == Parser(query).columns
+
+
+def test_single_quoted_identifiers():
+    # Solved: https://github.com/macbre/sql-metadata/issues/541
+    query = (
+        "SELECT r.Year, AVG(r.'Walt Disney Parks and Resorts') AS Avg_Parks_Revenue"
+        " FROM 'revenue' r WHERE r.Year=2000"
+    )
+    parser = Parser(query)
+    assert parser.tables == ["revenue"]
+    assert parser.columns == ["revenue.Year", "revenue.Walt Disney Parks and Resorts"]
diff --git a/test/test_values.py b/test/test_values.py
index 58b62c9b..07668a53 100644
--- a/test/test_values.py
+++ b/test/test_values.py
@@ -52,7 +52,7 @@ def test_getting_values():
         " '2021-02-27 03:21:52', 'test comment', 0, '0', "
         "'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv: 78.0) "
         "Gecko/20100101 Firefox/78.0', "
-        "'comment', 0, 0)',"
+        "'comment', 0, 0)"
     )
     assert parser.values == [
         1,
@@ -93,3 +93,65 @@ def test_getting_values():
         "comment_parent": 0,
         "user_id": 0,
     }
+
+
+def test_values_on_invalid_sql():
+    """Values extraction returns empty list for unparseable SQL."""
+    from sql_metadata import Parser
+
+    p = Parser(";;;")
+    assert p.values == []
+
+
+def test_values_on_comment_only_sql():
+    """Values extraction returns empty list when SQL is only comments."""
+    from sql_metadata import Parser
+
+    p = Parser("/* just a comment */")
+    assert p.values == []
+
+
+def test_negative_integer_values():
+    """INSERT with a negative integer value."""
+    p = Parser("INSERT INTO scores (player, points) VALUES ('alice', -42)")
+    assert p.values == ["alice", -42]
+    assert p.values_dict == {"player": "alice", "points": -42}
+
+
+def test_negative_float_values():
+    """INSERT with a negative float value."""
+    p = Parser(
+        "INSERT INTO measurements (sensor, reading) VALUES ('temp', -3.14)"
+    )
+    assert p.values == ["temp", -3.14]
+    assert p.values_dict == {"sensor": "temp", "reading": -3.14}
+
+
+def test_insert_with_null_value():
+    """INSERT with NULL triggers the str(val) fallback in _convert_value."""
+    p = Parser("INSERT INTO t (a, b) VALUES (1, NULL)")
+    assert p.values == [1, "NULL"]
+    assert p.values_dict == {"a": 1, "b": "NULL"}
+
+
+def test_insert_with_scalar_subquery_in_values():
+    """Scalar subquery inside VALUES — columns from the subquery are extracted."""
+    p = Parser(
+        "INSERT INTO orders (customer_id) "
+        "VALUES ((SELECT id FROM customers WHERE email = 'foo@bar.com'))"
+    )
+    assert p.tables == ["orders", "customers"]
+    assert p.columns == ["customer_id", "id", "email"]
+
+
+def test_insert_multi_row_values():
+    # Solved: https://github.com/macbre/sql-metadata/issues/558
+    p = Parser("INSERT INTO t (field1, field2) VALUES (1, 2), (3, 4)")
+    assert p.values == [[1, 2], [3, 4]]
+    assert p.values_dict == {"field1": [1, 3], "field2": [2, 4]}
+
+
+def test_insert_with_expression_value():
+    """INSERT with a function call in VALUES uses str(val) fallback."""
+    p = Parser("INSERT INTO t (a) VALUES (CURRENT_TIMESTAMP)")
+    assert len(p.values) == 1
diff --git a/test/test_with_statements.py b/test/test_with_statements.py
index 07805d0c..51b4a88e 100644
--- a/test/test_with_statements.py
+++ b/test/test_with_statements.py
@@ -1,6 +1,6 @@
 import pytest
 
-from sql_metadata import Parser
+from sql_metadata import InvalidQueryDefinition, Parser
 from sql_metadata.keywords_lists import QueryType
 
 
@@ -19,9 +19,9 @@ def test_with_statements():
     assert parser.tables == ["table3", "table4", "database2.table2"]
     assert parser.with_names == ["database1.tableFromWith", "test"]
     assert parser.with_queries == {
-        "database1.tableFromWith": "SELECT aa.* FROM table3 as aa left join table4 on "
+        "database1.tableFromWith": "SELECT aa.* FROM table3 AS aa LEFT JOIN table4 ON "
         "aa.col1 = table4.col2",
-        "test": "SELECT * from table3",
+        "test": "SELECT * FROM table3",
     }
     parser = Parser("""
 WITH
@@ -143,18 +143,17 @@ def test_complicated_with():
     assert parser.query_type == QueryType.SELECT
     assert parser.with_names == ["uisd_filter_table"]
     assert parser.with_queries == {
-        "uisd_filter_table": "select session_id, srch_id, srch_ci, srch_co, srch_los, "
-        "srch_sort_type, impr_list from uisd where datem <= "
-        "date_sub(date_add(current_date(), 92), 7 * 52) and "
-        "lower(srch_sort_type) in ('expertpicks', 'recommended') "
-        "and srch_ci <= date_sub(date_add(current_date(), 92), 7 "
-        "* 52) and srch_co >= date_sub(date_add(current_date(), "
+        "uisd_filter_table": "SELECT session_id, srch_id, srch_ci, srch_co, srch_los, "
+        "srch_sort_type, impr_list FROM uisd WHERE datem <= "
+        "DATE_SUB(DATE_ADD(CURRENT_DATE(), 92), 7 * 52) AND "
+        "LOWER(srch_sort_type) IN ('expertpicks', 'recommended') "
+        "AND srch_ci <= DATE_SUB(DATE_ADD(CURRENT_DATE(), 92), 7 "
+        "* 52) AND srch_co >= DATE_SUB(DATE_ADD(CURRENT_DATE(), "
         "1), 7 * 52)"
     }
     assert parser.tables == [
         "uisd",
-        "impr_list",
-    ]  # this one is wrong too should be table
+    ]
     assert parser.columns == [
         "session_id",
         "srch_id",
@@ -268,9 +267,9 @@ def test_resolving_with_columns_with_nested_tables_prefixes():
     parser = Parser(query)
     assert parser.with_names == ["query1", "query2"]
     assert parser.with_queries == {
-        "query1": "SELECT t5.c1, t5.c2, t6.c4 FROM t5 left join t6 on t5.link1 = "
+        "query1": "SELECT t5.c1, t5.c2, t6.c4 FROM t5 LEFT JOIN t6 ON t5.link1 = "
         "t6.link2",
-        "query2": "SELECT c3, c7 FROM t7 union all select c4, c12 from t8",
+        "query2": "SELECT c3, c7 FROM t7 UNION ALL SELECT c4, c12 FROM t8",
     }
     assert parser.tables == ["t5", "t6", "t7", "t8"]
     assert parser.columns_aliases == {}
@@ -353,12 +352,12 @@ def test_nested_with_statement_in_create_table():
     assert parser.with_names == ["sub", "abc"]
     assert parser.subqueries_names == ["table_a"]
     assert parser.with_queries == {
-        "abc": "select * from other_table",
-        "sub": "select it_id from internal_table",
+        "abc": "SELECT * FROM other_table",
+        "sub": "SELECT it_id FROM internal_table",
     }
     assert parser.subqueries == {
-        "table_a": "with abc as(select * from other_table) select name, age, it_id "
-        "from table_z join abc on (table_z.it_id = abc.it_id)"
+        "table_a": "WITH abc AS (SELECT * FROM other_table) SELECT name, age, it_id "
+        "FROM table_z JOIN abc ON (table_z.it_id = abc.it_id)"
     }
 
     assert parser.query_type == QueryType.CREATE
@@ -444,7 +443,7 @@ def test_window_in_with():
     assert parser.with_names == ["cte_1"]
     assert parser.columns == ["column_1", "column_2"]
     assert parser.with_queries == {
-        "cte_1": "SELECT column_1, column_2 FROM table_1 WINDOW window_1 AS(PARTITION BY column_2)"
+        "cte_1": "SELECT column_1, column_2 FROM table_1 WINDOW window_1 AS (PARTITION BY column_2)"
     }
     assert parser.tables == ["table_1"]
 
@@ -500,7 +499,7 @@ def test_as_was_preceded_by_with_query():
         SELECT 1;
     """
     parser = Parser(query)
-    with pytest.raises(ValueError, match="This query is wrong"):
+    with pytest.raises(InvalidQueryDefinition):
         parser.tables
 
     query = """
@@ -509,7 +508,7 @@ def test_as_was_preceded_by_with_query():
         SELECT 1;
     """
     parser = Parser(query)
-    with pytest.raises(ValueError, match="This query is wrong"):
+    with pytest.raises(InvalidQueryDefinition):
         parser.tables
 
     query = """
@@ -518,7 +517,7 @@ def test_as_was_preceded_by_with_query():
         SELECT 1;
     """
     parser = Parser(query)
-    with pytest.raises(ValueError, match="This query is wrong"):
+    with pytest.raises(InvalidQueryDefinition):
         parser.tables
 
 
@@ -530,5 +529,246 @@ def test_malformed_with_query_hang():
         WHERE   domain =e''$.f') AS g
     FROM    h;"""
     parser = Parser(query)
-    with pytest.raises(ValueError, match="This query is wrong"):
+    with pytest.raises(InvalidQueryDefinition):
         parser.tables
+
+
+def test_nested_cte_not_in_tables():
+    # solved: https://github.com/macbre/sql-metadata/issues/314
+    query = """
+    WITH CTE_ROOT_1 as (
+        WITH CTE_CHILD as (
+            SELECT a FROM table_1 as t
+        )
+        SELECT a FROM CTE_CHILD
+    ),
+    CTE_ROOT_2 as (
+        SELECT b FROM table_2
+    )
+    SELECT a, b, c
+    FROM table_3 t3
+    LEFT JOIN CTE_ROOT_1 cr1 on t3.id = cr1.id
+    LEFT JOIN CTE_ROOT_2 cr2 on t3.id = cr2.id
+    LEFT JOIN table_4 t4 on t3.id = t4.id
+    """
+    parser = Parser(query)
+    assert parser.tables == ["table_1", "table_2", "table_3", "table_4"]
+    assert parser.columns == [
+        "a",
+        "b",
+        "c",
+        "table_3.id",
+        "cr1.id",
+        "cr2.id",
+        "table_4.id",
+    ]
+    assert parser.tables_aliases == {
+        "t3": "table_3",
+        "t4": "table_4",
+        "t": "table_1",
+    }
+
+
+def test_nested_with_name_not_table():
+    # solved: https://github.com/macbre/sql-metadata/issues/413
+    query = """
+    WITH
+    A as (
+        WITH intermediate_query as (
+            SELECT id, some_column FROM table_one
+        )
+        SELECT id, some_column FROM intermediate_query
+    ),
+    B as (
+        SELECT id, other_column FROM table_two
+    )
+    SELECT A.id, some_column, other_column
+    FROM A
+    INNER JOIN B ON A.id = B.id
+    """
+    parser = Parser(query)
+    assert parser.tables == ["table_one", "table_two"]
+    assert parser.columns == ["id", "some_column", "other_column"]
+
+
+def test_cte_alias_reuse():
+    # solved: https://github.com/macbre/sql-metadata/issues/262
+    query = """
+    WITH
+     cte_one AS (SELECT cte_id, cte_name FROM cte_one_table),
+     cte_two AS (SELECT B.cte_id FROM cte_one B),
+     cte_three AS (SELECT B.id FROM (SELECT id FROM table_two) B)
+    SELECT * FROM cte_two
+    """
+    parser = Parser(query)
+    assert parser.tables == ["cte_one_table", "table_two"]
+    assert "cte_id" in parser.columns
+    assert "cte_name" in parser.columns
+
+
+def test_group_by_not_table_alias_in_cte():
+    # solved: https://github.com/macbre/sql-metadata/issues/526
+    query = """
+    WITH [CTE1] AS (
+        SELECT [Col1], MAX([Col2]) AS [MaxCol2]
+        FROM [Table1]
+        GROUP BY [Col1]
+    )
+    SELECT t3.[Qty1], t4.[Code], t3.[DateCol]
+    FROM [Table1] t3
+    JOIN [CTE1] t1 ON t3.[Col1] = t1.[Col1] AND t3.[DateCol] = t1.[MaxCol2]
+    JOIN [Table2] t4 ON t4.[ID] = t3.[Col2]
+    """
+    parser = Parser(query)
+    aliases = parser.tables_aliases
+    assert "GROUP BY" not in aliases
+    assert "[Table1]" in parser.tables
+    assert "[Table2]" in parser.tables
+
+
+def test_coalesce_three_args_in_cte():
+    """COALESCE with 3+ args should render as COALESCE, not IFNULL."""
+    p = Parser(
+        "WITH cte AS (SELECT COALESCE(a, b, c) FROM t) "
+        "SELECT * FROM cte"
+    )
+    body = p.with_queries["cte"]
+    assert "COALESCE" in body.upper()
+
+
+def test_date_add_in_cte():
+    """DATE_ADD in a CTE body should be preserved by the custom generator."""
+    p = Parser(
+        "WITH cte AS (SELECT DATE_ADD(created, INTERVAL 1 DAY) FROM events) "
+        "SELECT * FROM cte"
+    )
+    body = p.with_queries["cte"]
+    assert "DATE_ADD" in body.upper()
+
+
+def test_date_sub_in_cte():
+    """DATE_SUB in a CTE body should be preserved by the custom generator."""
+    p = Parser(
+        "WITH cte AS (SELECT DATE_SUB(created, INTERVAL 1 DAY) FROM events) "
+        "SELECT * FROM cte"
+    )
+    body = p.with_queries["cte"]
+    assert "DATE_SUB" in body.upper()
+
+
+def test_not_expression_in_cte():
+    """NOT applied to a boolean expression (not IS NULL or IN) in CTE body."""
+    p = Parser(
+        "WITH cte AS (SELECT * FROM t WHERE NOT (active > 0)) "
+        "SELECT * FROM cte"
+    )
+    body = p.with_queries["cte"]
+    assert "NOT" in body.upper()
+
+
+def test_nested_resolver_unresolvable_reference():
+    """A dotted column reference not matching any CTE/subquery stays as-is."""
+    p = Parser(
+        "WITH cte AS (SELECT id FROM t) "
+        "SELECT nonexistent.col FROM cte"
+    )
+    assert "nonexistent.col" in p.columns
+
+
+def test_cte_with_subquery_and_star_alias():
+    # Solved: https://github.com/macbre/sql-metadata/issues/392
+    p = Parser("""with x as (select d.nbr, d.af_pk
+    from test_db.test_table3 d)
+    select q.hx_id, q.text
+    from (select prod_code, s.*
+        from testdb.test_table s
+        inner join testdb.test_table2 p on s.s1_fk = p.p1_sk
+    ) q
+    inner join x on q.s2_fk = x.af_pk""")
+    assert p.tables == [
+        "test_db.test_table3", "testdb.test_table", "testdb.test_table2"
+    ]
+    assert p.with_names == ["x"]
+    assert "testdb.test_table.*" in p.columns
+
+
+def test_bracketed_select_with_cte_and_column_alias():
+    # Solved: https://github.com/macbre/sql-metadata/issues/326
+    p = Parser("""with a as (select id, a from tbl1),
+    with b as (select id, b from tbl2)
+    (select a.id, a.a + b.b as t
+     from a left join b on a.id = b.id)""")
+    assert p.tables == ["tbl1", "tbl2"]
+    assert p.with_names == ["a", "b"]
+    assert p.columns == ["id", "a", "b"]
+
+
+def test_cte_without_alias_raises():
+    """CTE without a name is invalid SQL."""
+    with pytest.raises(InvalidQueryDefinition, match="All CTEs require an alias"):
+        Parser("WITH AS (SELECT 1) SELECT * FROM t").columns
+
+
+def test_with_queries_empty_when_no_cte():
+    """A query with no CTEs returns empty with_queries."""
+    p = Parser("SELECT * FROM t")
+    assert p.with_queries == {}
+
+
+def test_cte_subquery_full_resolution():
+    """Subquery + CTE: CTE-qualified columns fully resolved."""
+    parser = Parser("""
+    WITH c AS (SELECT id, name FROM t1)
+    SELECT s.id, t2.name
+    FROM (SELECT c.id FROM c) AS s
+    JOIN t2 ON s.id = t2.id
+    """)
+    assert parser.tables == ["t1", "t2"]
+    assert "c.id" not in parser.columns
+    assert "id" in parser.columns
+
+
+def test_chained_cte_qualified_columns_resolved():
+    """CTE-qualified columns should resolve through chained CTEs."""
+    # 2-level chain
+    p = Parser("""
+    WITH c1 AS (SELECT a FROM t1),
+         c2 AS (SELECT c1.a FROM c1)
+    SELECT c2.a FROM c2
+    """)
+    assert p.tables == ["t1"]
+    assert p.columns == ["a"]
+
+    # 3-level chain
+    p = Parser("""
+    WITH c1 AS (SELECT a FROM t1),
+         c2 AS (SELECT c1.a FROM c1),
+         c3 AS (SELECT c2.a FROM c2)
+    SELECT c3.a FROM c3
+    """)
+    assert p.tables == ["t1"]
+    assert p.columns == ["a"]
+
+
+def test_chained_cte_with_subquery():
+    """CTE-qualified columns in subqueries wrapping chained CTEs."""
+    p = Parser("""
+    WITH c1 AS (SELECT a FROM t1),
+         c2 AS (SELECT c1.a FROM c1)
+    SELECT s.a FROM (SELECT c2.a FROM c2) AS s
+    """)
+    assert p.tables == ["t1"]
+    assert p.columns == ["a"]
+
+
+def test_chained_cte_cross_reference():
+    """4-level CTE chain where level 3 references both level 2 and level 1."""
+    p = Parser("""
+    WITH c1 AS (SELECT a, b FROM t1),
+         c2 AS (SELECT c1.a FROM c1),
+         c3 AS (SELECT c2.a, c1.b FROM c2 JOIN c1 ON c2.a = c1.a),
+         c4 AS (SELECT c3.a, c3.b FROM c3)
+    SELECT c4.a, c4.b FROM c4
+    """)
+    assert p.tables == ["t1"]
+    assert p.columns == ["a", "b"]