From 90640f0e4d0d072d9c2e3191c343443976b94349 Mon Sep 17 00:00:00 2001 From: Evan Miller Date: Fri, 23 May 2025 09:18:47 -0400 Subject: [PATCH 1/2] XPTv8: support for informats --- src/readstat.h | 3 +++ src/readstat_variable.c | 7 ++++++ src/readstat_writer.c | 8 ++++++ src/sas/readstat_xport_read.c | 13 +++++++++- src/sas/readstat_xport_write.c | 45 ++++++++++++++++++++++++++++------ src/test/test_list.h | 8 ++++++ src/test/test_read.c | 5 ++++ src/test/test_types.h | 1 + src/test/test_write.c | 2 ++ 9 files changed, 83 insertions(+), 9 deletions(-) diff --git a/src/readstat.h b/src/readstat.h index c6ece530..1b335812 100644 --- a/src/readstat.h +++ b/src/readstat.h @@ -205,6 +205,7 @@ typedef struct readstat_variable_s { int index; char name[300]; char format[256]; + char informat[256]; char label[1024]; readstat_label_set_t *label_set; off_t offset; @@ -287,6 +288,7 @@ int readstat_variable_get_index_after_skipping(const readstat_variable_t *variab const char *readstat_variable_get_name(const readstat_variable_t *variable); const char *readstat_variable_get_label(const readstat_variable_t *variable); const char *readstat_variable_get_format(const readstat_variable_t *variable); +const char *readstat_variable_get_informat(const readstat_variable_t *variable); readstat_type_t readstat_variable_get_type(const readstat_variable_t *variable); readstat_type_class_t readstat_variable_get_type_class(const readstat_variable_t *variable); size_t readstat_variable_get_storage_width(const readstat_variable_t *variable); @@ -538,6 +540,7 @@ readstat_variable_t *readstat_add_variable(readstat_writer_t *writer, const char size_t storage_width); void readstat_variable_set_label(readstat_variable_t *variable, const char *label); void readstat_variable_set_format(readstat_variable_t *variable, const char *format); +void readstat_variable_set_informat(readstat_variable_t *variable, const char *informat); void readstat_variable_set_label_set(readstat_variable_t *variable, readstat_label_set_t *label_set); void readstat_variable_set_measure(readstat_variable_t *variable, readstat_measure_t measure); void readstat_variable_set_alignment(readstat_variable_t *variable, readstat_alignment_t alignment); diff --git a/src/readstat_variable.c b/src/readstat_variable.c index ecd71ab5..37d5164f 100644 --- a/src/readstat_variable.c +++ b/src/readstat_variable.c @@ -41,6 +41,13 @@ const char *readstat_variable_get_format(const readstat_variable_t *variable) { return NULL; } +const char *readstat_variable_get_informat(const readstat_variable_t *variable) { + if (variable->informat[0]) + return variable->informat; + + return NULL; +} + readstat_type_t readstat_variable_get_type(const readstat_variable_t *variable) { return variable->type; } diff --git a/src/readstat_writer.c b/src/readstat_writer.c index c1e98907..cc376327 100644 --- a/src/readstat_writer.c +++ b/src/readstat_writer.c @@ -436,6 +436,14 @@ void readstat_variable_set_format(readstat_variable_t *variable, const char *for } } +void readstat_variable_set_informat(readstat_variable_t *variable, const char *informat) { + if (informat) { + snprintf(variable->informat, sizeof(variable->informat), "%s", informat); + } else { + memset(variable->informat, '\0', sizeof(variable->informat)); + } +} + void readstat_variable_set_measure(readstat_variable_t *variable, readstat_measure_t measure) { variable->measure = measure; } diff --git a/src/sas/readstat_xport_read.c b/src/sas/readstat_xport_read.c index 6bd9ddbf..7894f162 100644 --- a/src/sas/readstat_xport_read.c +++ b/src/sas/readstat_xport_read.c @@ -431,6 +431,11 @@ static readstat_error_t xport_read_labels_v9(xport_ctx_t *ctx, int label_count) format, format_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; + + retval = readstat_convert(variable->informat, sizeof(variable->informat), + informat, informat_len, ctx->converter); + if (retval != READSTAT_OK) + goto cleanup; } retval = xport_skip_rest_of_record(ctx); @@ -487,7 +492,13 @@ static readstat_error_t xport_read_variables(xport_ctx_t *ctx) { retval = xport_construct_format(variable->format, sizeof(variable->format), namestr.nform, sizeof(namestr.nform), - variable->display_width, variable->decimals); + namestr.nfl, namestr.nfd); + if (retval != READSTAT_OK) + goto cleanup; + + retval = xport_construct_format(variable->informat, sizeof(variable->informat), + namestr.niform, sizeof(namestr.niform), + namestr.nifl, namestr.nifd); if (retval != READSTAT_OK) goto cleanup; diff --git a/src/sas/readstat_xport_write.c b/src/sas/readstat_xport_write.c index 1f7b317b..9db70f3f 100644 --- a/src/sas/readstat_xport_write.c +++ b/src/sas/readstat_xport_write.c @@ -108,7 +108,7 @@ static readstat_error_t xport_write_variables(readstat_writer_t *writer) { copypad(namestr.nlabel, sizeof(namestr.nlabel), variable->label); if (variable->format[0]) { - xport_format_t format; + xport_format_t format; retval = xport_parse_format(variable->format, strlen(variable->format), &format, NULL, NULL); @@ -119,10 +119,6 @@ static readstat_error_t xport_write_variables(readstat_writer_t *writer) { namestr.nfl = format.width; namestr.nfd = format.decimals; - copypad(namestr.niform, sizeof(namestr.niform), format.name); - namestr.nifl = format.width; - namestr.nifd = format.decimals; - if (strlen(format.name) > 8) { any_has_long_format = 1; needs_long_record = 1; @@ -131,6 +127,25 @@ static readstat_error_t xport_write_variables(readstat_writer_t *writer) { namestr.nfl = variable->display_width; } + if (variable->informat[0]) { + xport_format_t informat; + + retval = xport_parse_format(variable->informat, strlen(variable->informat), + &informat, NULL, NULL); + + if (retval != READSTAT_OK) + goto cleanup; + + copypad(namestr.niform, sizeof(namestr.niform), informat.name); + namestr.nifl = informat.width; + namestr.nifd = informat.decimals; + + if (strlen(informat.name) > 8) { + any_has_long_format = 1; + needs_long_record = 1; + } + } + namestr.nfj = (variable->alignment == READSTAT_ALIGNMENT_RIGHT); if (writer->version == 8) { @@ -176,13 +191,14 @@ static readstat_error_t xport_write_variables(readstat_writer_t *writer) { size_t label_len = strlen(variable->label); size_t name_len = strlen(variable->name); size_t format_len = strlen(variable->format); + size_t informat_len = strlen(variable->informat); int has_long_label = 0; int has_long_format = 0; has_long_label = (label_len > 40); if (variable->format[0]) { - xport_format_t format; + xport_format_t format; retval = xport_parse_format(variable->format, strlen(variable->format), &format, NULL, NULL); @@ -194,8 +210,21 @@ static readstat_error_t xport_write_variables(readstat_writer_t *writer) { } } + if (variable->informat[0]) { + xport_format_t informat; + + retval = xport_parse_format(variable->informat, strlen(variable->informat), + &informat, NULL, NULL); + if (retval != READSTAT_OK) + goto cleanup; + + if (strlen(informat.name) > 8) { + has_long_format = 1; + } + } + if (has_long_format) { - uint16_t labeldef[5] = { i+1, name_len, label_len, format_len, format_len }; + uint16_t labeldef[5] = { i+1, name_len, label_len, format_len, informat_len }; if (machine_is_little_endian()) { labeldef[0] = byteswap2(labeldef[0]); @@ -221,7 +250,7 @@ static readstat_error_t xport_write_variables(readstat_writer_t *writer) { if (retval != READSTAT_OK) goto cleanup; - retval = readstat_write_string(writer, variable->format); + retval = readstat_write_string(writer, variable->informat); if (retval != READSTAT_OK) goto cleanup; diff --git a/src/test/test_list.h b/src/test/test_list.h index dea6b9dc..856595c3 100644 --- a/src/test/test_list.h +++ b/src/test/test_list.h @@ -921,6 +921,14 @@ static rt_test_group_t _test_groups[] = { { .name = "VAR2", .type = READSTAT_TYPE_STRING, .format = "$CHAR3", .label_set = "$CHAR3" } } }, + { + .label = "SAS informats", + .test_formats = RT_FORMAT_XPORT_8, + .columns = { + { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .informat = "10.3" }, + { .name = "VAR2", .type = READSTAT_TYPE_STRING, .informat = "$CHAR3" } + } + }, { .label = "SAS long format", .test_formats = RT_FORMAT_SAS7BDAT | RT_FORMAT_XPORT_8, diff --git a/src/test/test_read.c b/src/test/test_read.c index 9fc432a4..1bd93895 100644 --- a/src/test/test_read.c +++ b/src/test/test_read.c @@ -200,6 +200,11 @@ static int handle_variable(int index, readstat_variable_t *variable, readstat_variable_get_format(variable), "Column formats"); + if (column->informat[0]) + push_error_if_strings_differ(rt_ctx, column->informat, + readstat_variable_get_informat(variable), + "Column informats"); + if (column->display_width) push_error_if_doubles_differ(rt_ctx, column->display_width, readstat_variable_get_display_width(variable), diff --git a/src/test/test_types.h b/src/test/test_types.h index c0d762d4..f9f303e2 100644 --- a/src/test/test_types.h +++ b/src/test/test_types.h @@ -27,6 +27,7 @@ typedef struct rt_column_s { char name[RT_MAX_STRING]; char label[RT_MAX_STRING]; char format[RT_MAX_STRING]; + char informat[RT_MAX_STRING]; int display_width; readstat_alignment_t alignment; readstat_measure_t measure; diff --git a/src/test/test_write.c b/src/test/test_write.c index 8d68997a..9bc7545f 100644 --- a/src/test/test_write.c +++ b/src/test/test_write.c @@ -115,6 +115,8 @@ readstat_error_t write_file_to_buffer(rt_test_file_t *file, rt_buffer_t *buffer, readstat_variable_set_label_set(variable, label_set); if (column->format[0]) readstat_variable_set_format(variable, column->format); + if (column->informat[0]) + readstat_variable_set_informat(variable, column->informat); if (column->display_width) readstat_variable_set_display_width(variable, column->display_width); From 8818d206fc183247104662b25bf7fa19f0b87ca6 Mon Sep 17 00:00:00 2001 From: Evan Miller Date: Fri, 23 May 2025 09:44:02 -0400 Subject: [PATCH 2/2] SAS7BDAT: set default informat --- src/sas/readstat_sas7bdat_read.c | 3 +++ src/sas/readstat_xport_write.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/sas/readstat_sas7bdat_read.c b/src/sas/readstat_sas7bdat_read.c index 20c76a8b..dbcb47b4 100644 --- a/src/sas/readstat_sas7bdat_read.c +++ b/src/sas/readstat_sas7bdat_read.c @@ -704,6 +704,9 @@ static readstat_variable_t *sas7bdat_init_variable(sas7bdat_ctx_t *ctx, int i, len += snprintf(variable->format + len, sizeof(variable->format) - len, ".%d", ctx->col_info[i].format_digits); } + if (len) { // TODO where is the informat saved? + readstat_variable_set_informat(variable, variable->format); + } if ((retval = sas7bdat_copy_text_ref(variable->label, sizeof(variable->label), ctx->col_info[i].label_ref, ctx)) != READSTAT_OK) { goto cleanup; diff --git a/src/sas/readstat_xport_write.c b/src/sas/readstat_xport_write.c index 9db70f3f..8c223ffe 100644 --- a/src/sas/readstat_xport_write.c +++ b/src/sas/readstat_xport_write.c @@ -123,8 +123,9 @@ static readstat_error_t xport_write_variables(readstat_writer_t *writer) { any_has_long_format = 1; needs_long_record = 1; } - } else if (variable->display_width) { + } else { namestr.nfl = variable->display_width; + namestr.nfd = variable->decimals; } if (variable->informat[0]) {