diff --git a/CHANGELOG.md b/CHANGELOG.md index 94d931edd..b672f2262 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Escape single quotes in string values for SodaCL checks (#1090) - Fixed catalog export SpecView not having a tags property for the index.html template (#1059) +- Fix SQL importer type mappings: binary types, datetime/time, uuid now map to correct ODCS logicalType and format (#790) ### Added - Support additional PyArrow types in Parquet importer (#1091) +- Populate `logicalTypeOptions.format` for SQL import from binary and uuid types (#790) +- Snowflake DDL import with tags, descriptions, and template variable handling (#790) ## [0.11.6] - 2026-03-17 diff --git a/datacontract/imports/dbml_importer.py b/datacontract/imports/dbml_importer.py index 47e058237..384fd5ba7 100644 --- a/datacontract/imports/dbml_importer.py +++ b/datacontract/imports/dbml_importer.py @@ -97,7 +97,7 @@ def import_table_fields(table, references) -> List[SchemaProperty]: description = field.note.text if field.note else None is_primary_key = field.pk is_unique = field.unique - logical_type = map_type_from_sql(field.type) + logical_type, format = map_type_from_sql(field.type) ref = get_reference(field, references) @@ -109,6 +109,7 @@ def import_table_fields(table, references) -> List[SchemaProperty]: name=field_name, logical_type=logical_type if logical_type else "string", physical_type=field.type, + format=format, description=description, required=required if required else None, primary_key=is_primary_key if is_primary_key else None, diff --git a/datacontract/imports/odcs_helper.py b/datacontract/imports/odcs_helper.py index 4c5a27f35..dbb9caed5 100644 --- a/datacontract/imports/odcs_helper.py +++ b/datacontract/imports/odcs_helper.py @@ -34,6 +34,7 @@ def create_schema_object( description: str = None, business_name: str = None, properties: List[SchemaProperty] = None, + tags: List[str] = None, ) -> SchemaObject: """Create a SchemaObject (equivalent to DCS Model).""" schema = SchemaObject( @@ -48,6 +49,8 @@ def create_schema_object( schema.businessName = business_name if properties: schema.properties = properties + if tags: + schema.tags = tags return schema diff --git a/datacontract/imports/sql_importer.py b/datacontract/imports/sql_importer.py index 81673b299..c4f58b138 100644 --- a/datacontract/imports/sql_importer.py +++ b/datacontract/imports/sql_importer.py @@ -1,5 +1,6 @@ import logging import os +import re import sqlglot from open_data_contract_standard.model import OpenDataContractStandard @@ -18,17 +19,17 @@ class SqlImporter(Importer): def import_source(self, source: str, import_args: dict) -> OpenDataContractStandard: - return import_sql(self.import_format, source, import_args) + return import_sql(source, import_args) -def import_sql(format: str, source: str, import_args: dict = None) -> OpenDataContractStandard: +def import_sql(source: str, import_args: dict = None) -> OpenDataContractStandard: sql = read_file(source) dialect = to_dialect(import_args) try: parsed = sqlglot.parse_one(sql=sql, read=dialect) except Exception as e: - logging.error(f"Error parsing SQL: {str(e)}") + logging.error(f"Error sqlglot SQL: {str(e)}") raise DataContractException( type="import", name=f"Reading source from {source}", @@ -57,12 +58,13 @@ def import_sql(format: str, source: str, import_args: dict = None) -> OpenDataCo col_name = column.this.name col_type = to_col_type(column, dialect) - logical_type = map_type_from_sql(col_type) + logical_type, format = map_type_from_sql(col_type) col_description = get_description(column) max_length = get_max_length(column) precision, scale = get_precision_scale(column) is_primary_key = get_primary_key(column) is_required = column.find(sqlglot.exp.NotNullColumnConstraint) is not None or None + tags = get_tags(column) prop = create_property( name=col_name, @@ -72,9 +74,11 @@ def import_sql(format: str, source: str, import_args: dict = None) -> OpenDataCo max_length=max_length, precision=precision, scale=scale, + format=format, primary_key=is_primary_key, primary_key_position=primary_key_position if is_primary_key else None, required=is_required if is_required else None, + tags=tags, ) if is_primary_key: @@ -82,9 +86,24 @@ def import_sql(format: str, source: str, import_args: dict = None) -> OpenDataCo properties.append(prop) + table_comment_property = parsed.find(sqlglot.expressions.SchemaCommentProperty) + + table_description = None + if table_comment_property: + table_description = table_comment_property.this.this + + table_tags = None + table_props = parsed.find(sqlglot.expressions.Properties) + if table_props: + tags = table_props.find(sqlglot.expressions.Tags) + if tags: + table_tags = [str(t) for t in tags.expressions] + schema_obj = create_schema_object( name=table_name, physical_type="table", + description=table_description, + tags=table_tags, properties=properties, ) odcs.schema_.append(schema_obj) @@ -112,27 +131,9 @@ def to_dialect(import_args: dict) -> Dialects | None: return Dialects.TSQL if dialect.upper() in Dialects.__members__: return Dialects[dialect.upper()] - if dialect == "sqlserver": - return Dialects.TSQL return None -def to_physical_type_key(dialect: Dialects | str | None) -> str: - dialect_map = { - Dialects.TSQL: "sqlserverType", - Dialects.POSTGRES: "postgresType", - Dialects.BIGQUERY: "bigqueryType", - Dialects.SNOWFLAKE: "snowflakeType", - Dialects.REDSHIFT: "redshiftType", - Dialects.ORACLE: "oracleType", - Dialects.MYSQL: "mysqlType", - Dialects.DATABRICKS: "databricksType", - } - if isinstance(dialect, str): - dialect = Dialects[dialect.upper()] if dialect.upper() in Dialects.__members__ else None - return dialect_map.get(dialect, "physicalType") - - def to_server_type(source, dialect: Dialects | None) -> str | None: if dialect is None: return None @@ -166,10 +167,22 @@ def to_col_type_normalized(column): def get_description(column: sqlglot.expressions.ColumnDef) -> str | None: if column.comments is None: - return None + description = column.find(sqlglot.expressions.CommentColumnConstraint) + if description: + return description.this.this + else: + return None return " ".join(comment.strip() for comment in column.comments) +def get_tags(column: sqlglot.expressions.ColumnDef) -> list[str] | None: + tags = column.find(sqlglot.expressions.Tags) + if tags: + return [str(t) for t in tags.expressions] + else: + return None + + def get_max_length(column: sqlglot.expressions.ColumnDef) -> int | None: col_type = to_col_type_normalized(column) if col_type is None: @@ -212,79 +225,97 @@ def get_precision_scale(column): return None, None -def map_type_from_sql(sql_type: str) -> str | None: - """Map SQL type to ODCS logical type.""" +def map_type_from_sql(sql_type: str) -> tuple[str, str | None]: + """Map SQL type to ODCS logical type and optional format. + + Returns (logicalType, format). + The format corresponds to ODCS logicalTypeOptions.format (e.g. "binary", "uuid"). + """ if sql_type is None: - return None + return ("string", None) sql_type_normed = sql_type.lower().strip() if sql_type_normed.startswith("varchar"): - return "string" + return ("string", None) elif sql_type_normed.startswith("char"): - return "string" + return ("string", None) elif sql_type_normed.startswith("string"): - return "string" + return ("string", None) elif sql_type_normed.startswith("nchar"): - return "string" + return ("string", None) elif sql_type_normed.startswith("text"): - return "string" + return ("string", None) elif sql_type_normed.startswith("nvarchar"): - return "string" + return ("string", None) elif sql_type_normed.startswith("ntext"): - return "string" - elif sql_type_normed.startswith("int") and not sql_type_normed.startswith("interval"): - return "integer" - elif sql_type_normed.startswith("bigint"): - return "integer" - elif sql_type_normed.startswith("tinyint"): - return "integer" - elif sql_type_normed.startswith("smallint"): - return "integer" + return ("string", None) + elif sql_type_normed.endswith("int") and not sql_type_normed.endswith("point"): + return ("integer", None) + elif sql_type_normed.endswith("integer"): + return ("integer", None) elif sql_type_normed.startswith("float"): - return "number" + return ("number", None) elif sql_type_normed.startswith("double"): - return "number" - elif sql_type_normed.startswith("decimal"): - return "number" + return ("number", None) + elif sql_type_normed == "real": + return ("number", None) + elif sql_type_normed.startswith("number"): + return ("number", None) elif sql_type_normed.startswith("numeric"): - return "number" + return ("number", None) + elif sql_type_normed.startswith("decimal"): + return ("number", None) + elif sql_type_normed.startswith("money"): + return ("number", None) elif sql_type_normed.startswith("bool"): - return "boolean" + return ("boolean", None) elif sql_type_normed.startswith("bit"): - return "boolean" + return ("boolean", None) elif sql_type_normed.startswith("binary"): - return "array" + return ("string", "binary") elif sql_type_normed.startswith("varbinary"): - return "array" + return ("string", "binary") elif sql_type_normed.startswith("raw"): - return "array" - elif sql_type_normed == "blob" or sql_type_normed == "bfile": - return "array" + return ("string", "binary") + elif sql_type_normed == "blob": + return ("string", "binary") + elif sql_type_normed == "bfile": + return ("string", "binary") + elif sql_type_normed.startswith("bytea"): + return ("string", "binary") + elif sql_type_normed == "image": + return ("string", "binary") elif sql_type_normed == "date": - return "date" + return ("date", None) elif sql_type_normed == "time": - return "string" + return ("time", None) elif sql_type_normed.startswith("timestamp"): - return "date" - elif sql_type_normed == "datetime" or sql_type_normed == "datetime2": - return "date" + return ("timestamp", None) elif sql_type_normed == "smalldatetime": - return "date" - elif sql_type_normed == "datetimeoffset": - return "date" + return ("timestamp", None) + elif sql_type_normed.startswith("datetime"): # tsql datetime2, datetimeoffset + return ("timestamp", None) elif sql_type_normed == "uniqueidentifier": # tsql - return "string" + return ("string", "uuid") elif sql_type_normed == "json": - return "object" + return ("object", None) elif sql_type_normed == "xml": # tsql - return "string" - elif sql_type_normed.startswith("number"): - return "number" + return ("string", None) elif sql_type_normed == "clob" or sql_type_normed == "nclob": - return "string" + return ("string", None) else: - return "object" + return ("object", None) + + +def remove_variable_tokens(sql_script: str) -> str: + """Replace templating placeholders with bare variable names so sqlglot can parse the SQL.""" + variable_pattern = re.compile( + r"\$\((\w+)\)" # $(var) — sqlcmd (T-SQL) + r"|\$\{(\w+)\}" # ${var} — Liquibase + r"|\{\{(\w+)\}\}" # {{var}} — Jinja / dbt + ) + return variable_pattern.sub(lambda m: m.group(1) or m.group(2) or m.group(3), sql_script) def read_file(path): @@ -298,4 +329,5 @@ def read_file(path): ) with open(path, "r") as file: file_content = file.read() - return file_content + + return remove_variable_tokens(file_content) diff --git a/datacontract/imports/unity_importer.py b/datacontract/imports/unity_importer.py index bd0fe27b3..6fa7a7716 100644 --- a/datacontract/imports/unity_importer.py +++ b/datacontract/imports/unity_importer.py @@ -152,7 +152,7 @@ def import_table_fields(columns: List[ColumnInfo]) -> List[SchemaProperty]: def _to_property(column: ColumnInfo) -> SchemaProperty: """Convert a Unity ColumnInfo to an ODCS SchemaProperty.""" sql_type = str(column.type_text) if column.type_text else "string" - logical_type = map_type_from_sql(sql_type) + logical_type, format = map_type_from_sql(sql_type) required = column.nullable is None or not column.nullable return create_property( @@ -160,6 +160,7 @@ def _to_property(column: ColumnInfo) -> SchemaProperty: logical_type=logical_type if logical_type else "string", physical_type=sql_type, description=column.comment, + format=format, required=required if required else None, custom_properties={"databricksType": sql_type} if sql_type else None, ) diff --git a/tests/fixtures/databricks-unity/import/datacontract.yaml b/tests/fixtures/databricks-unity/import/datacontract.yaml index 3dff216cd..ae894b326 100644 --- a/tests/fixtures/databricks-unity/import/datacontract.yaml +++ b/tests/fixtures/databricks-unity/import/datacontract.yaml @@ -53,7 +53,7 @@ schema: customProperties: - property: databricksType value: timestamp - logicalType: date + logicalType: timestamp - name: is_active physicalType: boolean customProperties: diff --git a/tests/fixtures/dbml/import/datacontract.yaml b/tests/fixtures/dbml/import/datacontract.yaml index 2aa614e83..d14583e85 100644 --- a/tests/fixtures/dbml/import/datacontract.yaml +++ b/tests/fixtures/dbml/import/datacontract.yaml @@ -26,7 +26,7 @@ schema: physicalType: timestamp description: The business timestamp in UTC when the order was successfully registered in the source system and the payment was successful. - logicalType: date + logicalType: timestamp required: true - name: order_total physicalType: record @@ -46,7 +46,7 @@ schema: - name: processed_timestamp physicalType: timestamp description: The timestamp when the record was processed by the data platform. - logicalType: date + logicalType: timestamp required: true - name: line_items physicalType: table diff --git a/tests/fixtures/dbml/import/datacontract_table_filtered.yaml b/tests/fixtures/dbml/import/datacontract_table_filtered.yaml index b8d5bb2d9..2247ebc1b 100644 --- a/tests/fixtures/dbml/import/datacontract_table_filtered.yaml +++ b/tests/fixtures/dbml/import/datacontract_table_filtered.yaml @@ -26,7 +26,7 @@ schema: physicalType: timestamp description: The business timestamp in UTC when the order was successfully registered in the source system and the payment was successful. - logicalType: date + logicalType: timestamp required: true - name: order_total physicalType: record @@ -46,5 +46,5 @@ schema: - name: processed_timestamp physicalType: timestamp description: The timestamp when the record was processed by the data platform. - logicalType: date + logicalType: timestamp required: true diff --git a/tests/fixtures/snowflake/import/ddl.sql b/tests/fixtures/snowflake/import/ddl.sql new file mode 100644 index 000000000..c458db7e5 --- /dev/null +++ b/tests/fixtures/snowflake/import/ddl.sql @@ -0,0 +1,42 @@ +CREATE TABLE IF NOT EXISTS ${database_name}.PUBLIC.my_table ( + -- https://docs.snowflake.com/en/sql-reference/intro-summary-data-types + field_primary_key NUMBER(38,0) NOT NULL autoincrement start 1 increment 1 COMMENT 'Primary key', + field_not_null INT NOT NULL COMMENT 'Not null', + field_char CHAR(10) COMMENT 'Fixed-length string', + field_character CHARACTER(10) COMMENT 'Fixed-length string', + field_varchar VARCHAR(100) WITH TAG (SNOWFLAKE.CORE.PRIVACY_CATEGORY='IDENTIFIER', SNOWFLAKE.CORE.SEMANTIC_CATEGORY='NAME') COMMENT 'Variable-length string', + + field_text TEXT COMMENT 'Large variable-length string', + field_string STRING COMMENT 'Large variable-length Unicode string', + + field_tinyint TINYINT COMMENT 'Integer (0-255)', + field_smallint SMALLINT COMMENT 'Integer (-32,768 to 32,767)', + field_int INT COMMENT 'Integer (-2.1B to 2.1B)', + field_integer INTEGER COMMENT 'Integer full name(-2.1B to 2.1B)', + field_bigint BIGINT COMMENT 'Large integer (-9 quintillion to 9 quintillion)', + + field_decimal DECIMAL(10, 2) COMMENT 'Fixed precision decimal', + field_numeric NUMERIC(10, 2) COMMENT 'Same as DECIMAL', + + field_float FLOAT COMMENT 'Approximate floating-point', + field_float4 FLOAT4 COMMENT 'Approximate floating-point 4', + field_float8 FLOAT8 COMMENT 'Approximate floating-point 8', + field_real REAL COMMENT 'Smaller floating-point', + + field_boulean BOOLEAN COMMENT 'Boolean-like (0 or 1)', + + field_date DATE COMMENT 'Date only (YYYY-MM-DD)', + field_time TIME COMMENT 'Time only (HH:MM:SS)', + field_timestamp TIMESTAMP COMMENT 'More precise datetime', + field_timestamp_ltz TIMESTAMP_LTZ COMMENT 'More precise datetime with local time zone; time zone, if provided, isn`t stored.', + field_timestamp_ntz TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP() COMMENT 'More precise datetime with no time zone; time zone, if provided, isn`t stored.', + field_timestamp_tz TIMESTAMP_TZ COMMENT 'More precise datetime with time zone.', + + field_binary BINARY(16) COMMENT 'Fixed-length binary', + field_varbinary VARBINARY(100) COMMENT 'Variable-length binary', + + field_variant VARIANT COMMENT 'VARIANT data', + field_json OBJECT COMMENT 'JSON (Stored as text)', + UNIQUE(field_not_null), + PRIMARY KEY (field_primary_key) +) COMMENT = 'My Comment' diff --git a/tests/test_catalog.py b/tests/test_catalog.py index f1d61b0f4..a8192f674 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -19,12 +19,14 @@ def test_cli(tmp_path: PosixPath): def test_with_tags(tmp_path: PosixPath): runner = CliRunner() - result = runner.invoke(app, ["catalog", "--files", "fixtures/catalog/datacontract-3.tags.yaml", "--output", tmp_path]) + result = runner.invoke( + app, ["catalog", "--files", "fixtures/catalog/datacontract-3.tags.yaml", "--output", tmp_path] + ) assert result.exit_code == 0 assert os.path.exists(tmp_path / "index.html") with open(tmp_path / "index.html") as index: content = index.read() # naive assertion which only checks that it appears anywhere in the document - assert 'tag expected-tag' in content + assert "tag expected-tag" in content assert 'data-tag="expected-tag"' in content diff --git a/tests/test_export_sql_query.py b/tests/test_export_sql_query.py index e9b502081..0fde5a948 100644 --- a/tests/test_export_sql_query.py +++ b/tests/test_export_sql_query.py @@ -46,9 +46,9 @@ def test_to_sql_query_snowflake(): def test_to_sql_query_physical_name(): - actual = DataContract( - data_contract_file="fixtures/postgres-export-physical-name/datacontract.yaml" - ).export("sql-query") + actual = DataContract(data_contract_file="fixtures/postgres-export-physical-name/datacontract.yaml").export( + "sql-query" + ) expected = """ -- Data Contract: postgres-physical-name -- SQL Dialect: postgres diff --git a/tests/test_import_sql_oracle.py b/tests/test_import_sql_oracle.py index 37ffc5604..e9b13ac06 100644 --- a/tests/test_import_sql_oracle.py +++ b/tests/test_import_sql_oracle.py @@ -83,15 +83,15 @@ def test_import_sql_oracle(): physicalType: DOUBLE PRECISION description: 64-bit floating point number - name: field_timestamp - logicalType: date + logicalType: timestamp physicalType: TIMESTAMP description: Timestamp with fractional second precision of 6, no timezones - name: field_timestamp_tz - logicalType: date + logicalType: timestamp physicalType: TIMESTAMP WITH TIME ZONE description: Timestamp with fractional second precision of 6, with timezones (TZ) - name: field_timestamp_ltz - logicalType: date + logicalType: timestamp physicalType: TIMESTAMPLTZ description: Timestamp with fractional second precision of 6, with local timezone (LTZ) - name: field_interval_year @@ -103,7 +103,9 @@ def test_import_sql_oracle(): physicalType: INTERVAL DAY TO SECOND description: Interval of time in days, hours, minutes and seconds with default (2 / 6) precision - name: field_raw - logicalType: array + logicalType: string + logicalTypeOptions: + format: binary physicalType: RAW description: Large raw binary data - name: field_rowid @@ -135,11 +137,15 @@ def test_import_sql_oracle(): physicalType: NCLOB description: National character large object - name: field_blob - logicalType: array + logicalType: string + logicalTypeOptions: + format: binary physicalType: BLOB description: Binary large object - name: field_bfile - logicalType: array + logicalType: string + logicalTypeOptions: + format: binary physicalType: BFILE """ print("Result", result.to_yaml()) @@ -176,7 +182,7 @@ def test_import_sql_constraints(): physicalType: VARCHAR(30) required: true - name: create_date - logicalType: date + logicalType: timestamp physicalType: TIMESTAMP required: true - name: changed_by @@ -185,7 +191,7 @@ def test_import_sql_constraints(): maxLength: 30 physicalType: VARCHAR(30) - name: change_date - logicalType: date + logicalType: timestamp physicalType: TIMESTAMP - name: name logicalType: string diff --git a/tests/test_import_sql_postgres.py b/tests/test_import_sql_postgres.py index 908a9810a..84dc1848c 100644 --- a/tests/test_import_sql_postgres.py +++ b/tests/test_import_sql_postgres.py @@ -14,13 +14,7 @@ def test_cli(): runner = CliRunner() result = runner.invoke( app, - [ - "import", - "--format", - "sql", - "--source", - sql_file_path, - ], + ["import", "--format", "sql", "--source", sql_file_path, "--dialect", "postgres"], ) assert result.exit_code == 0 @@ -56,7 +50,7 @@ def test_import_sql_postgres(): physicalType: INT required: true - name: field_three - logicalType: date + logicalType: timestamp physicalType: TIMESTAMPTZ """ print("Result", result.to_yaml()) @@ -93,7 +87,7 @@ def test_import_sql_constraints(): physicalType: VARCHAR(30) required: true - name: create_date - logicalType: date + logicalType: timestamp physicalType: TIMESTAMP required: true - name: changed_by @@ -102,7 +96,7 @@ def test_import_sql_constraints(): maxLength: 30 physicalType: VARCHAR(30) - name: change_date - logicalType: date + logicalType: timestamp physicalType: TIMESTAMP - name: name logicalType: string diff --git a/tests/test_import_sql_snowflake.py b/tests/test_import_sql_snowflake.py new file mode 100644 index 000000000..2ad363f4a --- /dev/null +++ b/tests/test_import_sql_snowflake.py @@ -0,0 +1,171 @@ +import yaml + +from datacontract.data_contract import DataContract + +sql_file_path = "fixtures/snowflake/import/ddl.sql" + + +def test_import_sql_snowflake(): + result = DataContract().import_from_source("sql", sql_file_path, dialect="snowflake") + + expected = """version: 1.0.0 +kind: DataContract +apiVersion: v3.1.0 +id: my-data-contract +name: My Data Contract +status: draft +servers: +- server: snowflake + type: snowflake +schema: +- name: my_table + physicalType: table + description: My Comment + logicalType: object + physicalName: my_table + properties: + - name: field_primary_key + physicalType: DECIMAL(38, 0) + description: Primary key + logicalType: number + logicalTypeOptions: + precision: 38 + scale: 0 + required: true + - name: field_not_null + physicalType: INT + description: Not null + logicalType: integer + required: true + - name: field_char + physicalType: CHAR(10) + description: Fixed-length string + logicalType: string + logicalTypeOptions: + maxLength: 10 + - name: field_character + physicalType: CHAR(10) + description: Fixed-length string + logicalType: string + logicalTypeOptions: + maxLength: 10 + - name: field_varchar + physicalType: VARCHAR(100) + description: Variable-length string + tags: + - SNOWFLAKE.CORE.PRIVACY_CATEGORY='IDENTIFIER' + - SNOWFLAKE.CORE.SEMANTIC_CATEGORY='NAME' + logicalType: string + logicalTypeOptions: + maxLength: 100 + - name: field_text + physicalType: VARCHAR + description: Large variable-length string + logicalType: string + - name: field_string + physicalType: VARCHAR + description: Large variable-length Unicode string + logicalType: string + - name: field_tinyint + physicalType: TINYINT + description: Integer (0-255) + logicalType: integer + - name: field_smallint + physicalType: SMALLINT + description: Integer (-32,768 to 32,767) + logicalType: integer + - name: field_int + physicalType: INT + description: Integer (-2.1B to 2.1B) + logicalType: integer + - name: field_integer + physicalType: INT + description: Integer full name(-2.1B to 2.1B) + logicalType: integer + - name: field_bigint + physicalType: BIGINT + description: Large integer (-9 quintillion to 9 quintillion) + logicalType: integer + - name: field_decimal + physicalType: DECIMAL(10, 2) + description: Fixed precision decimal + logicalType: number + logicalTypeOptions: + precision: 10 + scale: 2 + - name: field_numeric + physicalType: DECIMAL(10, 2) + description: Same as DECIMAL + logicalType: number + logicalTypeOptions: + precision: 10 + scale: 2 + - name: field_float + physicalType: DOUBLE + description: Approximate floating-point + logicalType: number + - name: field_float4 + physicalType: FLOAT + description: Approximate floating-point 4 + logicalType: number + - name: field_float8 + physicalType: DOUBLE + description: Approximate floating-point 8 + logicalType: number + - name: field_real + physicalType: FLOAT + description: Smaller floating-point + logicalType: number + - name: field_boulean + physicalType: BOOLEAN + description: Boolean-like (0 or 1) + logicalType: boolean + - name: field_date + physicalType: DATE + description: Date only (YYYY-MM-DD) + logicalType: date + - name: field_time + physicalType: TIME + description: Time only (HH:MM:SS) + logicalType: time + - name: field_timestamp + physicalType: TIMESTAMP + description: More precise datetime + logicalType: timestamp + - name: field_timestamp_ltz + physicalType: TIMESTAMPLTZ + description: More precise datetime with local time zone; time zone, if provided, + isn`t stored. + logicalType: timestamp + - name: field_timestamp_ntz + physicalType: TIMESTAMPNTZ + description: More precise datetime with no time zone; time zone, if provided, + isn`t stored. + logicalType: timestamp + - name: field_timestamp_tz + description: More precise datetime with time zone. + logicalType: timestamp + physicalType: 'TIMESTAMPTZ' + - name: field_binary + physicalType: BINARY(16) + description: Fixed-length binary + logicalType: string + logicalTypeOptions: + format: binary + - name: field_varbinary + physicalType: VARBINARY(100) + description: Variable-length binary + logicalType: string + logicalTypeOptions: + format: binary + - name: field_variant + physicalType: VARIANT + description: VARIANT data + logicalType: object + - name: field_json + physicalType: OBJECT + description: JSON (Stored as text) + logicalType: object""" + + print("Result", result.to_yaml()) + assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) diff --git a/tests/test_import_sql_sqlserver.py b/tests/test_import_sql_sqlserver.py index 479421856..b62d7d61b 100644 --- a/tests/test_import_sql_sqlserver.py +++ b/tests/test_import_sql_sqlserver.py @@ -114,31 +114,37 @@ def test_import_sql_sqlserver(): physicalType: DATE description: Date only (YYYY-MM-DD) - name: field_time - logicalType: string + logicalType: time physicalType: TIME description: Time only (HH:MM:SS) - name: field_datetime2 - logicalType: date + logicalType: timestamp physicalType: DATETIME2 description: More precise datetime - name: field_smalldatetime - logicalType: date + logicalType: timestamp physicalType: SMALLDATETIME description: Less precise datetime - name: field_datetimeoffset - logicalType: date + logicalType: timestamp physicalType: DATETIMEOFFSET description: Datetime with time zone - name: field_binary - logicalType: array + logicalType: string + logicalTypeOptions: + format: binary physicalType: BINARY(16) description: Fixed-length binary - name: field_varbinary - logicalType: array + logicalType: string + logicalTypeOptions: + format: binary physicalType: VARBINARY(100) description: Variable-length binary - name: field_uniqueidentifier logicalType: string + logicalTypeOptions: + format: uuid physicalType: UNIQUEIDENTIFIER description: GUID - name: field_xml