Skip to content

Commit 4e20658

Browse files
committed
Convert DTA notes to UTF-8
1 parent 140f047 commit 4e20658

File tree

2 files changed

+33
-1
lines changed

2 files changed

+33
-1
lines changed

src/stata/readstat_dta_read.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ static readstat_error_t dta_read_expansion_fields(dta_ctx_t *ctx) {
184184
readstat_error_t retval = READSTAT_OK;
185185
readstat_io_t *io = ctx->io;
186186
char *buffer = NULL;
187+
char *utf8_note = NULL;
187188

188189
if (ctx->expansion_len_len == 0)
189190
return READSTAT_OK;
@@ -271,7 +272,21 @@ static readstat_error_t dta_read_expansion_fields(dta_ctx_t *ctx) {
271272
int index = 0;
272273
if (strncmp(&buffer[0], "_dta", 4) == 0 &&
273274
sscanf(&buffer[ctx->ch_metadata_len], "note%d", &index) == 1) {
274-
if (ctx->handle.note(index, &buffer[2*ctx->ch_metadata_len], ctx->user_ctx) != READSTAT_HANDLER_OK) {
275+
const char *note_src = &buffer[2*ctx->ch_metadata_len];
276+
size_t note_src_len = len - 2*ctx->ch_metadata_len;
277+
size_t utf8_note_len = 4*note_src_len + 1;
278+
279+
if ((utf8_note = readstat_realloc(utf8_note, utf8_note_len)) == NULL) {
280+
retval = READSTAT_ERROR_MALLOC;
281+
goto cleanup;
282+
}
283+
284+
retval = readstat_convert(utf8_note, utf8_note_len, note_src,
285+
strnlen(note_src, note_src_len), ctx->converter);
286+
if (retval != READSTAT_OK)
287+
goto cleanup;
288+
289+
if (ctx->handle.note(index, utf8_note, ctx->user_ctx) != READSTAT_HANDLER_OK) {
275290
retval = READSTAT_ERROR_USER_ABORT;
276291
goto cleanup;
277292
}
@@ -291,6 +306,8 @@ static readstat_error_t dta_read_expansion_fields(dta_ctx_t *ctx) {
291306
cleanup:
292307
if (buffer)
293308
free(buffer);
309+
if (utf8_note)
310+
free(utf8_note);
294311

295312
return retval;
296313
}

src/test/test_list.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,21 @@ static rt_test_group_t _test_groups[] = {
795795
.label_set = "somelbl"
796796
}
797797
}
798+
},
799+
{
800+
.label = "UTF-8 note",
801+
.test_formats = RT_FORMAT_DTA_118_AND_NEWER,
802+
.rows = 0,
803+
.notes_count = 1,
804+
.notes = {
805+
"Stra" "\xc3\x9f" "e"
806+
},
807+
.columns = {
808+
{
809+
.name = "var1",
810+
.type = READSTAT_TYPE_DOUBLE
811+
}
812+
}
798813
}
799814
}
800815
},

0 commit comments

Comments
 (0)