diff --git a/src/readstat_error.c b/src/readstat_error.c index d5d84ab..5d0739d 100644 --- a/src/readstat_error.c +++ b/src/readstat_error.c @@ -122,5 +122,8 @@ const char *readstat_error_message(readstat_error_t error_code) { if (error_code == READSTAT_ERROR_BAD_TIMESTAMP_VALUE) return "The provided file timestamp is invalid"; + if (error_code == READSTAT_ERROR_BAD_MR_STRING) + return "A multi-response set record is invalid"; + return "Unknown error"; } diff --git a/src/spss/readstat_sav_parse_mr_name.c b/src/spss/readstat_sav_parse_mr_name.c index 4bfad0c..8bcb33f 100644 --- a/src/spss/readstat_sav_parse_mr_name.c +++ b/src/spss/readstat_sav_parse_mr_name.c @@ -1,66 +1,64 @@ -#line 1 "src/spss/readstat_sav_parse_mr_name.rl" + +#line 1 "./readstat_sav_parse_mr_name.rl" #include #include #include #include "../readstat.h" #include "../readstat_malloc.h" +#include "../readstat_iconv.h" +#include "../readstat_convert.h" +#include "readstat_sav.h" -#line 8 "src/spss/readstat_sav_parse_mr_name.c" -static const signed char _mr_extractor_actions[] = { - 0, 1, 0, 1, 1, 1, 2, 1, - 3, 1, 4, 0 +#line 11 "./readstat_sav_parse_mr_name.c" +static const char _mr_extractor_actions[] = { + 0, 1, 0, 1, 1, 1, 2, 1, + 3, 1, 4 }; -static const signed char _mr_extractor_key_offsets[] = { - 0, 0, 8, 17, 19, 22, 24, 27, - 36, 48, 0 +static const char _mr_extractor_key_offsets[] = { + 0, 0, 2, 4, 6, 9, 11, 14, + 16, 21, 26 }; static const char _mr_extractor_trans_keys[] = { - 46, 95, 48, 57, 65, 90, 97, 122, - 46, 61, 95, 48, 57, 65, 90, 97, - 122, 67, 68, 32, 48, 57, 48, 57, - 32, 48, 57, 32, 46, 95, 48, 57, - 65, 90, 97, 122, 0, 32, 46, 95, - 9, 13, 48, 57, 65, 90, 97, 122, - 46, 95, 48, 57, 65, 90, 97, 122, - 0 + 32, 61, 32, 61, 67, 68, 32, 48, + 57, 48, 57, 32, 48, 57, 32, 61, + 0, 32, 61, 9, 13, 0, 32, 61, + 9, 13, 32, 61, 0 +}; + +static const char _mr_extractor_single_lengths[] = { + 0, 2, 2, 0, 1, 0, 1, 2, + 3, 3, 2 }; -static const signed char _mr_extractor_single_lengths[] = { - 0, 2, 3, 0, 1, 0, 1, 3, - 4, 2, 0 +static const char _mr_extractor_range_lengths[] = { + 0, 0, 0, 1, 1, 1, 1, 0, + 1, 1, 0 }; -static const signed char _mr_extractor_range_lengths[] = { - 0, 3, 3, 1, 1, 1, 1, 3, - 4, 3, 0 +static const char _mr_extractor_index_offsets[] = { + 0, 0, 3, 6, 8, 11, 13, 16, + 19, 24, 29 }; -static const signed char _mr_extractor_index_offsets[] = { - 0, 0, 6, 13, 15, 18, 20, 23, - 30, 39, 0 +static const char _mr_extractor_indicies[] = { + 1, 1, 0, 1, 2, 0, 3, 1, + 4, 5, 1, 6, 1, 7, 6, 1, + 9, 1, 8, 10, 11, 1, 10, 8, + 10, 11, 1, 10, 8, 1, 1, 8, + 0 }; -static const signed char _mr_extractor_cond_targs[] = { - 2, 2, 2, 2, 2, 0, 2, 3, - 2, 2, 2, 2, 0, 4, 0, 5, - 4, 0, 6, 0, 7, 6, 0, 7, - 8, 8, 8, 8, 8, 0, 9, 9, - 8, 8, 9, 8, 8, 8, 0, 8, - 8, 8, 8, 8, 0, 0, 1, 2, - 3, 4, 5, 6, 7, 8, 9, 0 +static const char _mr_extractor_trans_targs[] = { + 2, 0, 3, 4, 5, 4, 6, 7, + 8, 7, 9, 10 }; -static const signed char _mr_extractor_cond_actions[] = { - 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 3, 0, 5, - 0, 0, 0, 0, 7, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 9, 9, - 0, 0, 9, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 +static const char _mr_extractor_trans_actions[] = { + 0, 0, 1, 3, 5, 0, 0, 7, + 0, 0, 9, 9 }; static const int mr_extractor_start = 1; @@ -68,321 +66,306 @@ static const int mr_extractor_start = 1; static const int mr_extractor_en_main = 1; -#line 107 "src/spss/readstat_sav_parse_mr_name.rl" - - -readstat_error_t extract_mr_data(const char *line, mr_set_t *result) { - readstat_error_t retval = READSTAT_OK; - - // Variables needed for Ragel operation - int cs = 0; - char *p = (char *)line; - char *start = p; - char *pe = p + strlen(p) + 1; - - // Variables needed for passing Ragel intermediate results - char mr_type = '\0'; - int mr_counted_value = -1; - int mr_subvar_count = 0; - char **mr_subvariables = NULL; - char *mr_name = NULL; - char *mr_label = NULL; - - // Execute Ragel finite state machine (FSM) - -#line 89 "src/spss/readstat_sav_parse_mr_name.c" +#line 121 "./readstat_sav_parse_mr_name.rl" + + +readstat_error_t extract_mr_data(const char *line, mr_set_t *result, sav_ctx_t *ctx) { + readstat_error_t retval = READSTAT_OK; + + // Variables needed for Ragel operation + int cs = 0; + char *p = (char *)line; + char *start = p; + char *pe = p + strlen(p) + 1; + + // Variables needed for passing Ragel intermediate results + char mr_type = '\0'; + int mr_counted_value = -1; + int mr_subvar_count = 0; + char **mr_subvariables = NULL; + char *mr_name = NULL; + char *mr_label = NULL; + + // Execute Ragel finite state machine (FSM) + +#line 85 "./readstat_sav_parse_mr_name.c" { - cs = (int)mr_extractor_start; + cs = mr_extractor_start; } - -#line 127 "src/spss/readstat_sav_parse_mr_name.rl" - -#line 94 "src/spss/readstat_sav_parse_mr_name.c" +#line 142 "./readstat_sav_parse_mr_name.rl" + +#line 88 "./readstat_sav_parse_mr_name.c" { - int _klen; - unsigned int _trans = 0; - const char * _keys; - const signed char * _acts; - unsigned int _nacts; - _resume: {} - if ( p == pe ) - goto _out; - _keys = ( _mr_extractor_trans_keys + (_mr_extractor_key_offsets[cs])); - _trans = (unsigned int)_mr_extractor_index_offsets[cs]; - - _klen = (int)_mr_extractor_single_lengths[cs]; - if ( _klen > 0 ) { - const char *_lower = _keys; - const char *_upper = _keys + _klen - 1; - const char *_mid; - while ( 1 ) { - if ( _upper < _lower ) { - _keys += _klen; - _trans += (unsigned int)_klen; - break; - } - - _mid = _lower + ((_upper-_lower) >> 1); - if ( ( (*( p))) < (*( _mid)) ) - _upper = _mid - 1; - else if ( ( (*( p))) > (*( _mid)) ) - _lower = _mid + 1; - else { - _trans += (unsigned int)(_mid - _keys); - goto _match; - } - } - } - - _klen = (int)_mr_extractor_range_lengths[cs]; - if ( _klen > 0 ) { - const char *_lower = _keys; - const char *_upper = _keys + (_klen<<1) - 2; - const char *_mid; - while ( 1 ) { - if ( _upper < _lower ) { - _trans += (unsigned int)_klen; - break; - } - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( ( (*( p))) < (*( _mid)) ) - _upper = _mid - 2; - else if ( ( (*( p))) > (*( _mid + 1)) ) - _lower = _mid + 2; - else { - _trans += (unsigned int)((_mid - _keys)>>1); - break; - } + int _klen; + unsigned int _trans; + const char *_acts; + unsigned int _nacts; + const char *_keys; + + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _keys = _mr_extractor_trans_keys + _mr_extractor_key_offsets[cs]; + _trans = _mr_extractor_index_offsets[cs]; + + _klen = _mr_extractor_single_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( (*p) < *_mid ) + _upper = _mid - 1; + else if ( (*p) > *_mid ) + _lower = _mid + 1; + else { + _trans += (unsigned int)(_mid - _keys); + goto _match; } } - - _match: {} - cs = (int)_mr_extractor_cond_targs[_trans]; - - if ( _mr_extractor_cond_actions[_trans] != 0 ) { - - _acts = ( _mr_extractor_actions + (_mr_extractor_cond_actions[_trans])); - _nacts = (unsigned int)(*( _acts)); - _acts += 1; - while ( _nacts > 0 ) { - switch ( (*( _acts)) ) - { - case 0: { - { -#line 10 "src/spss/readstat_sav_parse_mr_name.rl" - - mr_name = (char *)readstat_malloc(p - start + 1); - if (mr_name == NULL) { - retval = READSTAT_ERROR_MALLOC; - goto cleanup; - } - memcpy(mr_name, start, p - start); - mr_name[p - start] = '\0'; - } - -#line 177 "src/spss/readstat_sav_parse_mr_name.c" - - break; - } - case 1: { - { -#line 20 "src/spss/readstat_sav_parse_mr_name.rl" - - mr_type = *p; - start = p + 1; - } - -#line 188 "src/spss/readstat_sav_parse_mr_name.c" - - break; - } - case 2: { - { -#line 25 "src/spss/readstat_sav_parse_mr_name.rl" - - int n_cv_digs = p - start; - char *n_dig_str = (char *)readstat_malloc(n_cv_digs + 1); - if (n_dig_str == NULL) { - retval = READSTAT_ERROR_MALLOC; - goto cleanup; - } - memcpy(n_dig_str, start, n_cv_digs); - n_dig_str[n_cv_digs] = '\0'; - int n_digs = strtol(n_dig_str, NULL, 10); - free(n_dig_str); - if (n_digs != 0) { - char *cv = (char *)readstat_malloc(n_digs + 1); - if (cv == NULL) { - retval = READSTAT_ERROR_MALLOC; - goto cleanup; - } - memcpy(cv, p + 1, n_digs); - cv[n_digs] = '\0'; - mr_counted_value = strtol(cv, NULL, 10); - free(cv); - p = p + 1 + n_digs; - start = p + 1; - } - else { - mr_counted_value = -1; - } - } - -#line 223 "src/spss/readstat_sav_parse_mr_name.c" - - break; - } - case 3: { - { -#line 54 "src/spss/readstat_sav_parse_mr_name.rl" - - char *lbl_len_str = (char *)readstat_malloc(p - start + 1); - if (lbl_len_str == NULL) { - retval = READSTAT_ERROR_MALLOC; - goto cleanup; - } - memcpy(lbl_len_str, start, p - start); - lbl_len_str[p - start] = '\0'; - int len = strtol(lbl_len_str, NULL, 10); - free(lbl_len_str); - mr_label = (char *)readstat_malloc(len + 1); - if (mr_label == NULL) { - retval = READSTAT_ERROR_MALLOC; - goto cleanup; - } - memcpy(mr_label, p + 1, len); - mr_label[len] = '\0'; - p = p + 1 + len; - start = p + 1; - } - -#line 250 "src/spss/readstat_sav_parse_mr_name.c" - - break; - } - case 4: { - { -#line 75 "src/spss/readstat_sav_parse_mr_name.rl" - - int len = p - start; - char *subvar = (char *)readstat_malloc(len + 1); - if (subvar == NULL) { - retval = READSTAT_ERROR_MALLOC; - goto cleanup; - } - memcpy(subvar, start, len); - subvar[len] = '\0'; - start = p + 1; - char **new_subvariables = readstat_realloc(mr_subvariables, sizeof(char *) * (mr_subvar_count + 1)); - if (new_subvariables == NULL) { - free(subvar); - retval = READSTAT_ERROR_MALLOC; - goto cleanup; - } - mr_subvariables = new_subvariables; - mr_subvariables[mr_subvar_count++] = subvar; - } - -#line 276 "src/spss/readstat_sav_parse_mr_name.c" - - break; - } - } - _nacts -= 1; - _acts += 1; + _keys += _klen; + _trans += _klen; + } + + _klen = _mr_extractor_range_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( (*p) < _mid[0] ) + _upper = _mid - 2; + else if ( (*p) > _mid[1] ) + _lower = _mid + 2; + else { + _trans += (unsigned int)((_mid - _keys)>>1); + goto _match; } - } - - if ( cs != 0 ) { - p += 1; - goto _resume; - } - _out: {} + _trans += _klen; } - -#line 128 "src/spss/readstat_sav_parse_mr_name.rl" - - - // Check if FSM finished successfully - if (cs < -#line 296 "src/spss/readstat_sav_parse_mr_name.c" -9 -#line 131 "src/spss/readstat_sav_parse_mr_name.rl" - || p != pe) { - retval = READSTAT_ERROR_BAD_MR_STRING; - goto cleanup; + +_match: + _trans = _mr_extractor_indicies[_trans]; + cs = _mr_extractor_trans_targs[_trans]; + + if ( _mr_extractor_trans_actions[_trans] == 0 ) + goto _again; + + _acts = _mr_extractor_actions + _mr_extractor_trans_actions[_trans]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + { + switch ( *_acts++ ) + { + case 0: +#line 13 "./readstat_sav_parse_mr_name.rl" + { + size_t src_len = p - start; + size_t dst_len = 4 * src_len + 1; // UTF-8 expansion: up to 4 bytes per char + mr_name = (char *)readstat_malloc(dst_len); + if (mr_name == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + retval = readstat_convert(mr_name, dst_len, start, src_len, ctx->converter); + if (retval != READSTAT_OK) { + goto cleanup; + } + } + break; + case 1: +#line 27 "./readstat_sav_parse_mr_name.rl" + { + mr_type = *p; + start = p + 1; + } + break; + case 2: +#line 32 "./readstat_sav_parse_mr_name.rl" + { + int n_cv_digs = p - start; + char *n_dig_str = (char *)readstat_malloc(n_cv_digs + 1); + if (n_dig_str == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(n_dig_str, start, n_cv_digs); + n_dig_str[n_cv_digs] = '\0'; + int n_digs = strtol(n_dig_str, NULL, 10); + free(n_dig_str); + if (n_digs != 0) { + char *cv = (char *)readstat_malloc(n_digs + 1); + if (cv == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(cv, p + 1, n_digs); + cv[n_digs] = '\0'; + mr_counted_value = strtol(cv, NULL, 10); + free(cv); + p = p + 1 + n_digs; + start = p + 1; + } + else { + mr_counted_value = -1; + } + } + break; + case 3: +#line 61 "./readstat_sav_parse_mr_name.rl" + { + char *lbl_len_str = (char *)readstat_malloc(p - start + 1); + if (lbl_len_str == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(lbl_len_str, start, p - start); + lbl_len_str[p - start] = '\0'; + int len = strtol(lbl_len_str, NULL, 10); + free(lbl_len_str); + size_t dst_len = 4 * len + 1; // UTF-8 expansion: up to 4 bytes per char + mr_label = (char *)readstat_malloc(dst_len); + if (mr_label == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + retval = readstat_convert(mr_label, dst_len, p + 1, len, ctx->converter); + if (retval != READSTAT_OK) { + goto cleanup; + } + p = p + 1 + len; + start = p + 1; + } + break; + case 4: +#line 85 "./readstat_sav_parse_mr_name.rl" + { + size_t src_len = p - start; + size_t dst_len = 4 * src_len + 1; // UTF-8 expansion: up to 4 bytes per char + char *subvar = (char *)readstat_malloc(dst_len); + if (subvar == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + retval = readstat_convert(subvar, dst_len, start, src_len, ctx->converter); + if (retval != READSTAT_OK) { + free(subvar); + goto cleanup; + } + start = p + 1; + char **new_subvariables = readstat_realloc(mr_subvariables, sizeof(char *) * (mr_subvar_count + 1)); + if (new_subvariables == NULL) { + free(subvar); + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + mr_subvariables = new_subvariables; + mr_subvariables[mr_subvar_count++] = subvar; + } + break; +#line 262 "./readstat_sav_parse_mr_name.c" + } } - - (void)mr_extractor_en_main; - - // Assign parsed values to output parameter - result->name = mr_name; - result->label = mr_label; - result->type = mr_type; - result->counted_value = mr_counted_value; - result->subvariables = mr_subvariables; - result->num_subvars = mr_subvar_count; - if (result->type == 'D') { - result->is_dichotomy = 1; + +_again: + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + _out: {} } - - cleanup: - if (retval != READSTAT_OK) { - if (mr_subvariables != NULL) { - for (int i = 0; i < mr_subvar_count; i++) { - if (mr_subvariables[i] != NULL) free(mr_subvariables[i]); - } - free(mr_subvariables); - } - if (mr_name != NULL) free(mr_name); - if (mr_label != NULL) free(mr_label); - } - return retval; + +#line 143 "./readstat_sav_parse_mr_name.rl" + + // Check if FSM finished successfully + if (cs < 9 || p != pe) { + retval = READSTAT_ERROR_BAD_MR_STRING; + goto cleanup; + } + + (void)mr_extractor_en_main; + + // Assign parsed values to output parameter + result->name = mr_name; + result->label = mr_label; + result->type = mr_type; + result->counted_value = mr_counted_value; + result->subvariables = mr_subvariables; + result->num_subvars = mr_subvar_count; + if (result->type == 'D') { + result->is_dichotomy = 1; + } + +cleanup: + if (retval != READSTAT_OK) { + if (mr_subvariables != NULL) { + for (int i = 0; i < mr_subvar_count; i++) { + if (mr_subvariables[i] != NULL) free(mr_subvariables[i]); + } + free(mr_subvariables); + } + if (mr_name != NULL) free(mr_name); + if (mr_label != NULL) free(mr_label); + } + return retval; } -readstat_error_t parse_mr_line(const char *line, mr_set_t *result) { - *result = (mr_set_t){0}; - return extract_mr_data(line, result); +readstat_error_t parse_mr_line(const char *line, mr_set_t *result, sav_ctx_t *ctx) { + *result = (mr_set_t){0}; + return extract_mr_data(line, result, ctx); } -#line 335 "src/spss/readstat_sav_parse_mr_name.c" -static const signed char _mr_parser_actions[] = { - 0, 1, 0, 0 +#line 313 "./readstat_sav_parse_mr_name.c" +static const char _mr_parser_actions[] = { + 0, 1, 0 }; -static const signed char _mr_parser_key_offsets[] = { - 0, 0, 1, 2, 4, 0 +static const char _mr_parser_key_offsets[] = { + 0, 0, 1, 2, 4 }; static const char _mr_parser_trans_keys[] = { 36, 10, 0, 10, 10, 0 }; -static const signed char _mr_parser_single_lengths[] = { - 0, 1, 1, 2, 1, 0 +static const char _mr_parser_single_lengths[] = { + 0, 1, 1, 2, 1 }; -static const signed char _mr_parser_range_lengths[] = { - 0, 0, 0, 0, 0, 0 +static const char _mr_parser_range_lengths[] = { + 0, 0, 0, 0, 0 }; -static const signed char _mr_parser_index_offsets[] = { - 0, 0, 2, 4, 7, 0 +static const char _mr_parser_index_offsets[] = { + 0, 0, 2, 4, 7 }; -static const signed char _mr_parser_cond_targs[] = { - 2, 0, 3, 2, 4, 3, 2, 3, - 2, 0, 1, 2, 3, 4, 0 +static const char _mr_parser_indicies[] = { + 0, 1, 2, 0, 3, 2, 0, 2, + 0, 0 }; -static const signed char _mr_parser_cond_actions[] = { - 0, 0, 1, 0, 0, 1, 0, 1, - 0, 0, 0, 0, 0, 0, 0 +static const char _mr_parser_trans_targs[] = { + 2, 0, 3, 4 +}; + +static const char _mr_parser_trans_actions[] = { + 0, 0, 1, 0 }; static const int mr_parser_start = 1; @@ -390,157 +373,148 @@ static const int mr_parser_start = 1; static const int mr_parser_en_main = 1; -#line 202 "src/spss/readstat_sav_parse_mr_name.rl" +#line 216 "./readstat_sav_parse_mr_name.rl" -readstat_error_t parse_mr_string(const char *line, mr_set_t **mr_sets, size_t *n_mr_lines) { - readstat_error_t retval = READSTAT_OK; - int cs = 0; - char *p = (char *)line; - char *start = p; - char *pe = p + strlen(p) + 1; - *mr_sets = NULL; - *n_mr_lines = 0; - +readstat_error_t parse_mr_string(const char *line, mr_set_t **mr_sets, size_t *n_mr_lines, sav_ctx_t *ctx) { + readstat_error_t retval = READSTAT_OK; + int cs = 0; + char *p = (char *)line; + char *start = p; + char *pe = p + strlen(p) + 1; + *mr_sets = NULL; + *n_mr_lines = 0; -#line 385 "src/spss/readstat_sav_parse_mr_name.c" + +#line 365 "./readstat_sav_parse_mr_name.c" { - cs = (int)mr_parser_start; + cs = mr_parser_start; } - -#line 213 "src/spss/readstat_sav_parse_mr_name.rl" - -#line 390 "src/spss/readstat_sav_parse_mr_name.c" +#line 228 "./readstat_sav_parse_mr_name.rl" + +#line 368 "./readstat_sav_parse_mr_name.c" { - int _klen; - unsigned int _trans = 0; - const char * _keys; - const signed char * _acts; - unsigned int _nacts; - _resume: {} - if ( p == pe ) - goto _out; - _keys = ( _mr_parser_trans_keys + (_mr_parser_key_offsets[cs])); - _trans = (unsigned int)_mr_parser_index_offsets[cs]; - - _klen = (int)_mr_parser_single_lengths[cs]; - if ( _klen > 0 ) { - const char *_lower = _keys; - const char *_upper = _keys + _klen - 1; - const char *_mid; - while ( 1 ) { - if ( _upper < _lower ) { - _keys += _klen; - _trans += (unsigned int)_klen; - break; - } - - _mid = _lower + ((_upper-_lower) >> 1); - if ( ( (*( p))) < (*( _mid)) ) - _upper = _mid - 1; - else if ( ( (*( p))) > (*( _mid)) ) - _lower = _mid + 1; - else { - _trans += (unsigned int)(_mid - _keys); - goto _match; - } + int _klen; + unsigned int _trans; + const char *_acts; + unsigned int _nacts; + const char *_keys; + + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _keys = _mr_parser_trans_keys + _mr_parser_key_offsets[cs]; + _trans = _mr_parser_index_offsets[cs]; + + _klen = _mr_parser_single_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( (*p) < *_mid ) + _upper = _mid - 1; + else if ( (*p) > *_mid ) + _lower = _mid + 1; + else { + _trans += (unsigned int)(_mid - _keys); + goto _match; } } - - _klen = (int)_mr_parser_range_lengths[cs]; - if ( _klen > 0 ) { - const char *_lower = _keys; - const char *_upper = _keys + (_klen<<1) - 2; - const char *_mid; - while ( 1 ) { - if ( _upper < _lower ) { - _trans += (unsigned int)_klen; - break; - } - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( ( (*( p))) < (*( _mid)) ) - _upper = _mid - 2; - else if ( ( (*( p))) > (*( _mid + 1)) ) - _lower = _mid + 2; - else { - _trans += (unsigned int)((_mid - _keys)>>1); - break; - } - } - } - - _match: {} - cs = (int)_mr_parser_cond_targs[_trans]; - - if ( _mr_parser_cond_actions[_trans] != 0 ) { - - _acts = ( _mr_parser_actions + (_mr_parser_cond_actions[_trans])); - _nacts = (unsigned int)(*( _acts)); - _acts += 1; - while ( _nacts > 0 ) { - switch ( (*( _acts)) ) - { - case 0: { - { -#line 172 "src/spss/readstat_sav_parse_mr_name.rl" - - char *mln = (char *)readstat_malloc(p - start); - if (mln == NULL) { - retval = READSTAT_ERROR_MALLOC; - goto cleanup; - } - memcpy(mln, start + 1, p - start); - mln[p - start - 1] = '\0'; - mr_set_t *new_mr_sets = readstat_realloc(*mr_sets, ((*n_mr_lines) + 1) * sizeof(mr_set_t)); - if (new_mr_sets == NULL) { - free(mln); - retval = READSTAT_ERROR_MALLOC; - goto cleanup; - } - *mr_sets = new_mr_sets; - retval = parse_mr_line(mln, &(*mr_sets)[*n_mr_lines]); - free(mln); - if (retval != READSTAT_OK) { - goto cleanup; - } - (*n_mr_lines)++; - start = p + 1; - } - -#line 487 "src/spss/readstat_sav_parse_mr_name.c" - - break; - } - } - _nacts -= 1; - _acts += 1; + _keys += _klen; + _trans += _klen; + } + + _klen = _mr_parser_range_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( (*p) < _mid[0] ) + _upper = _mid - 2; + else if ( (*p) > _mid[1] ) + _lower = _mid + 2; + else { + _trans += (unsigned int)((_mid - _keys)>>1); + goto _match; } - } - - if ( cs != 0 ) { - p += 1; - goto _resume; + _trans += _klen; + } + +_match: + _trans = _mr_parser_indicies[_trans]; + cs = _mr_parser_trans_targs[_trans]; + + if ( _mr_parser_trans_actions[_trans] == 0 ) + goto _again; + + _acts = _mr_parser_actions + _mr_parser_trans_actions[_trans]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + { + switch ( *_acts++ ) + { + case 0: +#line 186 "./readstat_sav_parse_mr_name.rl" + { + char *mln = (char *)readstat_malloc(p - start); + if (mln == NULL) { + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + memcpy(mln, start + 1, p - start); + mln[p - start - 1] = '\0'; + mr_set_t *new_mr_sets = readstat_realloc(*mr_sets, ((*n_mr_lines) + 1) * sizeof(mr_set_t)); + if (new_mr_sets == NULL) { + free(mln); + retval = READSTAT_ERROR_MALLOC; + goto cleanup; + } + *mr_sets = new_mr_sets; + retval = parse_mr_line(mln, &(*mr_sets)[*n_mr_lines], ctx); + free(mln); + if (retval != READSTAT_OK) { + goto cleanup; + } + (*n_mr_lines)++; + start = p + 1; + } + break; +#line 466 "./readstat_sav_parse_mr_name.c" } - _out: {} } - -#line 214 "src/spss/readstat_sav_parse_mr_name.rl" - - - if (cs < -#line 506 "src/spss/readstat_sav_parse_mr_name.c" -4 -#line 216 "src/spss/readstat_sav_parse_mr_name.rl" - || p != pe) { - retval = READSTAT_ERROR_BAD_MR_STRING; - goto cleanup; + +_again: + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + _out: {} } - - (void)mr_parser_en_main; - - cleanup: - return retval; + +#line 229 "./readstat_sav_parse_mr_name.rl" + + if (cs < 4 || p != pe) { + retval = READSTAT_ERROR_BAD_MR_STRING; + goto cleanup; + } + + (void)mr_parser_en_main; + +cleanup: + return retval; } diff --git a/src/spss/readstat_sav_parse_mr_name.h b/src/spss/readstat_sav_parse_mr_name.h index 3975216..6dce3e0 100644 --- a/src/spss/readstat_sav_parse_mr_name.h +++ b/src/spss/readstat_sav_parse_mr_name.h @@ -4,6 +4,6 @@ #include "../readstat.h" #include "../readstat_malloc.h" -readstat_error_t parse_mr_string(const char *line, mr_set_t **mr_sets, size_t *n_mr_lines); +readstat_error_t parse_mr_string(const char *line, mr_set_t **mr_sets, size_t *n_mr_lines, sav_ctx_t *ctx); #endif // READSTAT_PARSE_MR_NAME_H diff --git a/src/spss/readstat_sav_parse_mr_name.rl b/src/spss/readstat_sav_parse_mr_name.rl index 817638b..8b609e2 100644 --- a/src/spss/readstat_sav_parse_mr_name.rl +++ b/src/spss/readstat_sav_parse_mr_name.rl @@ -3,18 +3,25 @@ #include #include "../readstat.h" #include "../readstat_malloc.h" +#include "../readstat_iconv.h" +#include "../readstat_convert.h" +#include "readstat_sav.h" %%{ machine mr_extractor; action extract_mr_name { - mr_name = (char *)readstat_malloc(p - start + 1); + size_t src_len = p - start; + size_t dst_len = 4 * src_len + 1; // UTF-8 expansion: up to 4 bytes per char + mr_name = (char *)readstat_malloc(dst_len); if (mr_name == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } - memcpy(mr_name, start, p - start); - mr_name[p - start] = '\0'; + retval = readstat_convert(mr_name, dst_len, start, src_len, ctx->converter); + if (retval != READSTAT_OK) { + goto cleanup; + } } action extract_mr_type { @@ -61,27 +68,34 @@ lbl_len_str[p - start] = '\0'; int len = strtol(lbl_len_str, NULL, 10); free(lbl_len_str); - mr_label = (char *)readstat_malloc(len + 1); + size_t dst_len = 4 * len + 1; // UTF-8 expansion: up to 4 bytes per char + mr_label = (char *)readstat_malloc(dst_len); if (mr_label == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } - memcpy(mr_label, p + 1, len); - mr_label[len] = '\0'; + retval = readstat_convert(mr_label, dst_len, p + 1, len, ctx->converter); + if (retval != READSTAT_OK) { + goto cleanup; + } p = p + 1 + len; start = p + 1; } action extract_subvar { - int len = p - start; - char *subvar = (char *)readstat_malloc(len + 1); + size_t src_len = p - start; + size_t dst_len = 4 * src_len + 1; // UTF-8 expansion: up to 4 bytes per char + char *subvar = (char *)readstat_malloc(dst_len); if (subvar == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } - memcpy(subvar, start, len); - subvar[len] = '\0'; - start = p + 1; + retval = readstat_convert(subvar, dst_len, start, src_len, ctx->converter); + if (retval != READSTAT_OK) { + free(subvar); + goto cleanup; + } + start = p + 1; char **new_subvariables = readstat_realloc(mr_subvariables, sizeof(char *) * (mr_subvar_count + 1)); if (new_subvariables == NULL) { free(subvar); @@ -92,7 +106,7 @@ mr_subvariables[mr_subvar_count++] = subvar; } - nc = (alnum | '_' | '.' ); # name character (including dots) + nc = ([^ =]); # name character (all characters except space and equals) name = nc+ '=' > extract_mr_name; type = ('C' | 'D'){1} > extract_mr_type; counted_value = digit* ' ' > extract_counted_value; @@ -106,7 +120,7 @@ write data nofinal noerror; }%% -readstat_error_t extract_mr_data(const char *line, mr_set_t *result) { +readstat_error_t extract_mr_data(const char *line, mr_set_t *result, sav_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; // Variables needed for Ragel operation @@ -161,9 +175,9 @@ cleanup: } -readstat_error_t parse_mr_line(const char *line, mr_set_t *result) { +readstat_error_t parse_mr_line(const char *line, mr_set_t *result, sav_ctx_t *ctx) { *result = (mr_set_t){0}; - return extract_mr_data(line, result); + return extract_mr_data(line, result, ctx); } %%{ @@ -184,7 +198,7 @@ readstat_error_t parse_mr_line(const char *line, mr_set_t *result) { goto cleanup; } *mr_sets = new_mr_sets; - retval = parse_mr_line(mln, &(*mr_sets)[*n_mr_lines]); + retval = parse_mr_line(mln, &(*mr_sets)[*n_mr_lines], ctx); free(mln); if (retval != READSTAT_OK) { goto cleanup; @@ -201,7 +215,7 @@ readstat_error_t parse_mr_line(const char *line, mr_set_t *result) { write data nofinal noerror; }%% -readstat_error_t parse_mr_string(const char *line, mr_set_t **mr_sets, size_t *n_mr_lines) { +readstat_error_t parse_mr_string(const char *line, mr_set_t **mr_sets, size_t *n_mr_lines, sav_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int cs = 0; char *p = (char *)line; diff --git a/src/spss/readstat_sav_read.c b/src/spss/readstat_sav_read.c index 731d8ad..5810f1f 100644 --- a/src/spss/readstat_sav_read.c +++ b/src/spss/readstat_sav_read.c @@ -167,7 +167,7 @@ static readstat_error_t sav_read_multiple_response_sets(size_t data_len, sav_ctx goto cleanup; } - retval = parse_mr_string(mr_string, &ctx->mr_sets, &ctx->multiple_response_sets_length); + retval = parse_mr_string(mr_string, &ctx->mr_sets, &ctx->multiple_response_sets_length, ctx); cleanup: free(mr_string);