Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Added
### Fixed
- Fix parser error for CSV / Parquet table names containing special characters (#1066)

### Added
- Added JSON output format for test results (`--output-format json`)
- Added Azure AD / Entra ID authentication support for SQL Server (`ActiveDirectoryPassword`, `ActiveDirectoryServicePrincipal`, `ActiveDirectoryInteractive`)

Expand Down
4 changes: 2 additions & 2 deletions datacontract/engines/data_contract_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ def to_schema_checks(schema_object: SchemaObject, server: Server) -> List[Check]

type1 = server.type if server and server.type else None
config = QuotingConfig(
quote_field_name=type1 in ["postgres", "sqlserver", "azure", "snowflake"],
quote_field_name=type1 in ["postgres", "sqlserver", "snowflake", "azure", "s3", "gcs", "local"],
quote_field_name_with_backticks=type1 in ["databricks"],
quote_model_name=type1 in ["postgres", "sqlserver", "snowflake"],
quote_model_name=type1 in ["postgres", "sqlserver", "snowflake", "azure", "s3", "gcs", "local"],
quote_model_name_with_backticks=type1 == "bigquery",
)
quoting_config = config
Expand Down
11 changes: 6 additions & 5 deletions datacontract/engines/soda/connections/duckdb_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def get_duckdb_connection(
elif server.format == "delta":
con.sql("update extensions;") # Make sure we have the latest delta extension
con.sql(f"""CREATE VIEW "{model_name}" AS SELECT * FROM delta_scan('{model_path}');""")
table_info = con.sql(f"PRAGMA table_info('{model_name}');").fetchall()
table_info = con.sql(f'PRAGMA table_info("{model_name}");').fetchall()
if table_info:
run.log_info(f"DuckDB Table Info: {table_info}")
return con
Expand All @@ -99,12 +99,13 @@ def create_view_with_schema_union(con, schema_obj: SchemaObject, model_path: str
INTERSECT SELECT column_name
FROM information_schema.columns
WHERE table_name = '{model_name}'""").fetchall()
selected_columns = ", ".join([column[0] for column in intersecting_columns])

# Insert data into table by name, but only columns existing in contract and data
insert_data_sql = f"""INSERT INTO {model_name} BY NAME
(SELECT {selected_columns} FROM {read_function}('{model_path}', union_by_name=true, hive_partitioning=1));"""
con.sql(insert_data_sql)
if intersecting_columns:
selected_columns = ", ".join(f'"{column[0]}"' for column in intersecting_columns)
insert_data_sql = f"""INSERT INTO "{model_name}" BY NAME
(SELECT {selected_columns} FROM {read_function}('{model_path}', union_by_name=true, hive_partitioning=1));"""
con.sql(insert_data_sql)
else:
# Fallback
con.sql(
Expand Down
19 changes: 19 additions & 0 deletions tests/test_data_contract_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,22 @@ def test_check_property_is_present_no_snowflake_quoting():
checks = impl['checks for "my_table"']
schema_check = checks[0]["schema"]
assert schema_check["fail"]["when required column missing"] == ["name"]


def test_check_property_required_duckdb_hyphenated_model_name():
"""Test that model names with hyphens are double-quoted in required checks for DuckDB-backed sources (s3/gcs/azure/local)."""
quoting_config = QuotingConfig(quote_model_name=True)
check = check_property_required("test-1", "name", quoting_config)
impl = yaml.safe_load(check.implementation)
checks = impl['checks for "test-1"']
assert any("missing_count(name) = 0" in str(c) for c in checks)


def test_check_property_is_present_duckdb_hyphenated_model_name():
"""Test that model names with hyphens are double-quoted for DuckDB-backed sources (s3/gcs/azure/local)."""
quoting_config = QuotingConfig(quote_model_name=True)
check = check_property_is_present("test-1", "name", quoting_config)
impl = yaml.safe_load(check.implementation)
checks = impl['checks for "test-1"']
schema_check = checks[0]["schema"]
assert schema_check["fail"]["when required column missing"] == ["name"]
Loading