From 89629de0013c9969736b526cadb7dd334905687c Mon Sep 17 00:00:00 2001 From: cijiugechu Date: Fri, 27 Feb 2026 15:07:21 +0800 Subject: [PATCH 1/6] feat: add optional parse error locations across C and Rust APIs Expose location-aware error reporting through new C and Rust entry points while preserving existing parse APIs. Also add CI coverage for error-location builds/features and tighten lexer EOF error reporting so location assertions are reliable. --- .github/workflows/ci.yml | 18 ++++ README.md | 34 +++++++- cmake/lexer-flags.cmake | 1 + include/merve/parser.h | 26 ++++++ include/merve_c.h | 33 +++++++ rust/Cargo.lock | 2 +- rust/Cargo.toml | 2 + rust/README.md | 29 +++++++ rust/build.rs | 4 + rust/deps/merve.cpp | 142 ++++++++++++++++++++++++++++-- rust/deps/merve.h | 26 ++++++ rust/deps/merve_c.h | 33 +++++++ rust/src/ffi.rs | 14 +++ rust/src/lib.rs | 172 +++++++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 4 + src/merve_c.cpp | 31 +++++++ src/parser.cpp | 78 +++++++++++++++-- tests/c_api_compile_test.c | 10 +++ tests/c_api_tests.cpp | 45 ++++++++++ tests/real_world_tests.cpp | 35 ++++++++ 20 files changed, 727 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b13885b..c6da1a3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -91,6 +91,12 @@ jobs: cmake_gen: Ninja cmake_flags: "-DMERVE_USE_SIMDUTF=ON -DMERVE_SANITIZE=ON" name_suffix: " (simdutf, ASAN)" + # Ubuntu with error location tracking + - os: ubuntu-22.04 + cxx: g++-12 + cmake_gen: Ninja + cmake_flags: "-DMERVE_ENABLE_ERROR_LOCATION=ON" + name_suffix: " (error-location)" name: ${{ matrix.os }} ${{ matrix.cxx || 'MSVC' }}${{ matrix.name_suffix || '' }} runs-on: ${{ matrix.os }} @@ -164,6 +170,10 @@ jobs: working-directory: rust run: cargo clippy -- -D warnings + - name: Clippy (error-location) + working-directory: rust + run: cargo clippy --features error-location -- -D warnings + - name: Test working-directory: rust run: cargo test @@ -171,3 +181,11 @@ jobs: - name: Test (no default features) working-directory: rust run: cargo test --no-default-features + + - name: Test (error-location) + working-directory: rust + run: cargo test --features error-location + + - name: Test (no default features + error-location) + working-directory: rust + run: cargo test --no-default-features --features error-location diff --git a/README.md b/README.md index 5d8ac88..524c380 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,25 @@ const std::optional& get_last_error(); Returns the last parse error, if any. +### `lexer::get_last_error_location` + +```cpp +const std::optional& get_last_error_location(); +``` + +Returns the location of the last parse error, if available. Location tracking +is enabled when built with `MERVE_ENABLE_ERROR_LOCATION`. + +### `lexer::error_location` + +```cpp +struct error_location { + uint32_t line; // 1-based + uint32_t column; // 1-based + size_t offset; // 0-based byte offset +}; +``` + ## C API merve provides a C API (`merve_c.h`) for use from C programs, FFI bindings, or any language that can call C functions. The C API is compiled into the merve library alongside the C++ implementation. @@ -141,11 +160,14 @@ merve provides a C API (`merve_c.h`) for use from C programs, FFI bindings, or a ```c #include "merve_c.h" #include +#include int main(void) { const char* source = "exports.foo = 1;\nexports.bar = 2;\n"; - merve_analysis result = merve_parse_commonjs(source, strlen(source)); + merve_error_loc err_loc = {0, 0, 0}; + merve_analysis result = merve_parse_commonjs_ex( + source, strlen(source), &err_loc); if (merve_is_valid(result)) { size_t count = merve_get_exports_count(result); @@ -157,6 +179,10 @@ int main(void) { } } else { printf("Parse error: %d\n", merve_get_last_error()); + if (err_loc.line != 0) { + printf(" at line %u, column %u (byte offset %zu)\n", + err_loc.line, err_loc.column, err_loc.offset); + } } merve_free(result); @@ -180,12 +206,14 @@ Found 2 exports: | `merve_string` | Non-owning string reference (`data` + `length`). Not null-terminated. | | `merve_analysis` | Opaque handle to a parse result. Must be freed with `merve_free()`. | | `merve_version_components` | Struct with `major`, `minor`, `revision` fields. | +| `merve_error_loc` | Error location (`line`, `column`, `offset`). `{0,0,0}` means unavailable. | #### Functions | Function | Description | |----------|-------------| | `merve_parse_commonjs(input, length)` | Parse CommonJS source. Returns a handle (NULL only on OOM). | +| `merve_parse_commonjs_ex(input, length, out_err)` | Parse CommonJS source and optionally fill error location. | | `merve_is_valid(result)` | Check if parsing succeeded. NULL-safe. | | `merve_free(result)` | Free a parse result. NULL-safe. | | `merve_get_exports_count(result)` | Number of named exports found. | @@ -198,6 +226,9 @@ Found 2 exports: | `merve_get_version()` | Version string (e.g. `"1.0.1"`). | | `merve_get_version_components()` | Version as `{major, minor, revision}`. | +Build with `-DMERVE_ENABLE_ERROR_LOCATION=ON` to enable non-zero locations +from `merve_parse_commonjs_ex`. + #### Error Constants | Constant | Value | Description | @@ -344,6 +375,7 @@ ctest --test-dir build | `MERVE_TESTING` | `ON` | Build test suite | | `MERVE_BENCHMARKS` | `OFF` | Build benchmarks | | `MERVE_USE_SIMDUTF` | `OFF` | Use simdutf for optimized string operations | +| `MERVE_ENABLE_ERROR_LOCATION` | `OFF` | Track parse error source locations | | `MERVE_SANITIZE` | `OFF` | Enable address sanitizer | ### Building with simdutf diff --git a/cmake/lexer-flags.cmake b/cmake/lexer-flags.cmake index 9ad92c8..d1aafc7 100644 --- a/cmake/lexer-flags.cmake +++ b/cmake/lexer-flags.cmake @@ -1,6 +1,7 @@ option(MERVE_LOGGING "verbose output (useful for debugging)" OFF) option(MERVE_SANITIZE "Sanitize addresses" OFF) option(MERVE_USE_SIMDUTF "Use simdutf for optimized string operations" OFF) +option(MERVE_ENABLE_ERROR_LOCATION "Track parse error source locations" OFF) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") option(MERVE_SANITIZE_BOUNDS_STRICT "Sanitize bounds (strict): only for GCC" OFF) endif() diff --git a/include/merve/parser.h b/include/merve/parser.h index 8d1ff42..2def0be 100644 --- a/include/merve/parser.h +++ b/include/merve/parser.h @@ -3,6 +3,7 @@ #include "merve/version.h" +#include #include #include #include @@ -37,6 +38,18 @@ enum lexer_error { TEMPLATE_NEST_OVERFLOW, ///< Template literal nesting too deep }; +/** + * @brief Source location information for a parse error. + * + * - line and column are 1-based. + * - offset is 0-based and measured in bytes from the start of input. + */ +struct error_location { + uint32_t line; + uint32_t column; + size_t offset; +}; + /** * @brief Type alias for export names. * @@ -146,6 +159,19 @@ std::optional parse_commonjs(std::string_view file_contents); */ const std::optional& get_last_error(); +/** + * @brief Get the location of the last failed parse operation. + * + * @return const std::optional& The last error location, or + * std::nullopt if unavailable. + * + * @note This is global state and may be overwritten by subsequent calls + * to parse_commonjs(). + * @note Location tracking is enabled when built with + * MERVE_ENABLE_ERROR_LOCATION. + */ +const std::optional& get_last_error_location(); + } // namespace lexer #endif // MERVE_PARSER_H diff --git a/include/merve_c.h b/include/merve_c.h index af4a9d7..c85aeea 100644 --- a/include/merve_c.h +++ b/include/merve_c.h @@ -39,6 +39,20 @@ typedef struct { int revision; } merve_version_components; +/** + * @brief Source location for a parse error. + * + * - line and column are 1-based. + * - offset is 0-based and measured in bytes from the start of input. + * + * A zeroed location (`{0, 0, 0}`) means the location is unavailable. + */ +typedef struct { + uint32_t line; + uint32_t column; + size_t offset; +} merve_error_loc; + /* Error codes corresponding to lexer::lexer_error values. */ #define MERVE_ERROR_TODO 0 #define MERVE_ERROR_UNEXPECTED_PAREN 1 @@ -74,6 +88,25 @@ extern "C" { */ merve_analysis merve_parse_commonjs(const char* input, size_t length); +/** + * Parse CommonJS source code and optionally return error location. + * + * Behaves like merve_parse_commonjs(). If @p out_err is non-NULL, it is always + * written: + * - On success: set to {0, 0, 0}. + * - On parse failure with known location: set to that location. + * - On parse failure without available location: set to {0, 0, 0}. + * + * @param input Pointer to the JavaScript source (need not be + * null-terminated). NULL is treated as an empty string. + * @param length Length of the input in bytes. + * @param out_err Optional output pointer for parse error location. + * @return A handle to the parse result, or NULL on out-of-memory. + * Use merve_is_valid() to check if parsing succeeded. + */ +merve_analysis merve_parse_commonjs_ex(const char* input, size_t length, + merve_error_loc* out_err); + /** * Check whether the parse result is valid (parsing succeeded). * diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 6f19756..f4781cb 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -77,7 +77,7 @@ checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "merve" -version = "1.1.2" +version = "1.1.3" dependencies = [ "cc", "link_args", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 41a7d78..91140e6 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -26,6 +26,8 @@ default = ["std"] libcpp = [] # enable allocations std = [] +# enable parse error location support +error-location = [] [package.metadata.docs.rs] rustdoc-args = ["--cfg", "docsrs"] diff --git a/rust/README.md b/rust/README.md index e41045e..1038219 100644 --- a/rust/README.md +++ b/rust/README.md @@ -46,6 +46,13 @@ Requires `libc++` to be installed: merve = { version = "...", features = ["libcpp"] } ``` +**error-location**: Enables location-aware parse errors via +`parse_commonjs_with_location`. + +```toml +merve = { version = "...", features = ["error-location"] } +``` + ## API ### `parse_commonjs` @@ -58,6 +65,17 @@ Parse CommonJS source code and extract export information. The returned `Analysis` borrows from `source` because export names may point directly into the source buffer (zero-copy). +### `parse_commonjs_with_location` (`error-location` feature) + +```rust +pub fn parse_commonjs_with_location( + source: &str, +) -> Result, LocatedLexerError> +``` + +Like `parse_commonjs`, but returns a `LocatedLexerError` that includes +`kind: LexerError` plus optional location (`line`, `column`, `offset`). + ### `Analysis<'a>` | Method | Returns | Description | @@ -100,6 +118,17 @@ Returned when the input contains ESM syntax or malformed constructs: `LexerError` implements `Display` and, with the `std` feature, `std::error::Error`. +### `LocatedLexerError` (`error-location` feature) + +```rust +pub struct LocatedLexerError { + pub kind: LexerError, + pub location: Option, +} +``` + +`ErrorLocation` uses 1-based `line`/`column` and 0-based byte `offset`. + ### Versioning helpers ```rust diff --git a/rust/build.rs b/rust/build.rs index 74db0a9..48d5402 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -219,6 +219,10 @@ fn main() { build.include(&deps); build.cpp(true).std("c++20").warnings(false); + if env::var_os("CARGO_FEATURE_ERROR_LOCATION").is_some() { + build.define("MERVE_ENABLE_ERROR_LOCATION", Some("1")); + } + // Target handling let target_str = env::var("TARGET").unwrap(); let target: Vec = target_str.split('-').map(|s| s.into()).collect(); diff --git a/rust/deps/merve.cpp b/rust/deps/merve.cpp index dbb39b3..c4ed4db 100644 --- a/rust/deps/merve.cpp +++ b/rust/deps/merve.cpp @@ -314,6 +314,43 @@ struct StarExportBinding { // Thread-local state for error tracking (safe for concurrent parse calls). thread_local std::optional last_error; +thread_local std::optional last_error_location; + +#ifdef MERVE_ENABLE_ERROR_LOCATION +static error_location makeErrorLocation(const char* source, const char* end, const char* at) { + const char* target = at; + if (target < source) target = source; + if (target > end) target = end; + + uint32_t line = 1; + uint32_t column = 1; + const char* cur = source; + + while (cur < target) { + const char ch = *cur++; + if (ch == '\n') { + line++; + column = 1; + continue; + } + if (ch == '\r') { + line++; + column = 1; + if (cur < target && *cur == '\n') { + cur++; + } + continue; + } + column++; + } + + error_location loc{}; + loc.line = line; + loc.column = column; + loc.offset = static_cast(target - source); + return loc; +} +#endif // Lexer state class class CJSLexer { @@ -334,6 +371,7 @@ class CJSLexer { std::array templateStack_; std::array openTokenPosStack_; + std::array openTokenTypeStack_; std::array openClassPosStack; std::array starExportStack_; StarExportBinding* starExportStack; @@ -485,9 +523,15 @@ class CJSLexer { } // Parsing utilities - void syntaxError(lexer_error code) { + void syntaxError(lexer_error code, const char* at = nullptr) { if (!last_error) { last_error = code; +#ifdef MERVE_ENABLE_ERROR_LOCATION + const char* error_pos = at ? at : pos; + last_error_location = makeErrorLocation(source, end, error_pos); +#else + (void)at; +#endif } pos = end + 1; } @@ -1490,6 +1534,7 @@ class CJSLexer { char ch = commentWhitespace(); switch (ch) { case '(': + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = startPos; return; case '.': @@ -1503,7 +1548,7 @@ class CJSLexer { // It's something like import.metaData, not import.meta return; } - syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT_META); + syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT_META, startPos); } return; default: @@ -1518,17 +1563,18 @@ class CJSLexer { pos--; return; } - syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT); + syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT, startPos); } } void throwIfExportStatement() { + const char* startPos = pos; pos += 6; const char* curPos = pos; char ch = commentWhitespace(); if (pos == curPos && !isPunctuator(ch)) return; - syntaxError(lexer_error::UNEXPECTED_ESM_EXPORT); + syntaxError(lexer_error::UNEXPECTED_ESM_EXPORT, startPos); } public: @@ -1537,7 +1583,7 @@ class CJSLexer { templateStackDepth(0), openTokenDepth(0), templateDepth(0), line(1), lastSlashWasDivision(false), nextBraceIsClass(false), - templateStack_{}, openTokenPosStack_{}, openClassPosStack{}, + templateStack_{}, openTokenPosStack_{}, openTokenTypeStack_{}, openClassPosStack{}, starExportStack_{}, starExportStack(nullptr), STAR_EXPORT_STACK_END(nullptr), exports(out_exports), re_exports(out_re_exports) {} @@ -1602,6 +1648,7 @@ class CJSLexer { pos += 23; if (*pos == '(') { pos++; + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = lastTokenPos; if (tryParseRequire(RequireType::Import) && keywordStart(startPos)) tryBacktrackAddStarExportBinding(startPos - 1); @@ -1611,6 +1658,7 @@ class CJSLexer { if (pos + 4 < end && matchesAt(pos, end, "Star")) pos += 4; if (*pos == '(') { + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = lastTokenPos; if (*(pos + 1) == 'r') { pos++; @@ -1645,6 +1693,7 @@ class CJSLexer { tryParseObjectDefineOrKeys(openTokenDepth == 0); break; case '(': + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = lastTokenPos; break; case ')': @@ -1657,6 +1706,7 @@ class CJSLexer { case '{': openClassPosStack[openTokenDepth] = nextBraceIsClass; nextBraceIsClass = false; + openTokenTypeStack_[openTokenDepth] = '{'; openTokenPosStack_[openTokenDepth++] = lastTokenPos; break; case '}': @@ -1719,6 +1769,19 @@ class CJSLexer { lastTokenPos = pos; } + if (!last_error) { + if (templateDepth != std::numeric_limits::max()) { + syntaxError(lexer_error::UNTERMINATED_TEMPLATE_STRING, end); + } else if (openTokenDepth != 0) { + const char open_ch = openTokenTypeStack_[openTokenDepth - 1]; + if (open_ch == '{') { + syntaxError(lexer_error::UNTERMINATED_BRACE, end); + } else { + syntaxError(lexer_error::UNTERMINATED_PAREN, end); + } + } + } + if (templateDepth != std::numeric_limits::max() || openTokenDepth || last_error) { return false; } @@ -1729,6 +1792,7 @@ class CJSLexer { std::optional parse_commonjs(std::string_view file_contents) { last_error.reset(); + last_error_location.reset(); lexer_analysis result; CJSLexer lexer(result.exports, result.re_exports); @@ -1744,6 +1808,10 @@ const std::optional& get_last_error() { return last_error; } +const std::optional& get_last_error_location() { + return last_error_location; +} + } // namespace lexer /* end file parser.cpp */ /* begin file merve_c.cpp */ @@ -1796,6 +1864,20 @@ typedef struct { int revision; } merve_version_components; +/** + * @brief Source location for a parse error. + * + * - line and column are 1-based. + * - offset is 0-based and measured in bytes from the start of input. + * + * A zeroed location (`{0, 0, 0}`) means the location is unavailable. + */ +typedef struct { + uint32_t line; + uint32_t column; + size_t offset; +} merve_error_loc; + /* Error codes corresponding to lexer::lexer_error values. */ #define MERVE_ERROR_TODO 0 #define MERVE_ERROR_UNEXPECTED_PAREN 1 @@ -1831,6 +1913,25 @@ extern "C" { */ merve_analysis merve_parse_commonjs(const char* input, size_t length); +/** + * Parse CommonJS source code and optionally return error location. + * + * Behaves like merve_parse_commonjs(). If @p out_err is non-NULL, it is always + * written: + * - On success: set to {0, 0, 0}. + * - On parse failure with known location: set to that location. + * - On parse failure without available location: set to {0, 0, 0}. + * + * @param input Pointer to the JavaScript source (need not be + * null-terminated). NULL is treated as an empty string. + * @param length Length of the input in bytes. + * @param out_err Optional output pointer for parse error location. + * @return A handle to the parse result, or NULL on out-of-memory. + * Use merve_is_valid() to check if parsing succeeded. + */ +merve_analysis merve_parse_commonjs_ex(const char* input, size_t length, + merve_error_loc* out_err); + /** * Check whether the parse result is valid (parsing succeeded). * @@ -1941,9 +2042,31 @@ static merve_string merve_string_create(const char* data, size_t length) { return out; } +static void merve_error_loc_clear(merve_error_loc* out_err) { + if (!out_err) return; + out_err->line = 0; + out_err->column = 0; + out_err->offset = 0; +} + +static void merve_error_loc_set(merve_error_loc* out_err, + const lexer::error_location& loc) { + if (!out_err) return; + out_err->line = loc.line; + out_err->column = loc.column; + out_err->offset = loc.offset; +} + extern "C" { merve_analysis merve_parse_commonjs(const char* input, size_t length) { + return merve_parse_commonjs_ex(input, length, nullptr); +} + +merve_analysis merve_parse_commonjs_ex(const char* input, size_t length, + merve_error_loc* out_err) { + merve_error_loc_clear(out_err); + merve_analysis_impl* impl = new (std::nothrow) merve_analysis_impl(); if (!impl) return nullptr; if (input != nullptr) { @@ -1951,6 +2074,15 @@ merve_analysis merve_parse_commonjs(const char* input, size_t length) { } else { impl->result = lexer::parse_commonjs(std::string_view("", 0)); } + + if (!impl->result.has_value() && out_err) { + const std::optional& err_loc = + lexer::get_last_error_location(); + if (err_loc.has_value()) { + merve_error_loc_set(out_err, err_loc.value()); + } + } + return static_cast(impl); } diff --git a/rust/deps/merve.h b/rust/deps/merve.h index d30dec2..2730841 100644 --- a/rust/deps/merve.h +++ b/rust/deps/merve.h @@ -29,6 +29,7 @@ enum { #endif // MERVE_VERSION_H /* end file merve/version.h */ +#include #include #include #include @@ -63,6 +64,18 @@ enum lexer_error { TEMPLATE_NEST_OVERFLOW, ///< Template literal nesting too deep }; +/** + * @brief Source location information for a parse error. + * + * - line and column are 1-based. + * - offset is 0-based and measured in bytes from the start of input. + */ +struct error_location { + uint32_t line; + uint32_t column; + size_t offset; +}; + /** * @brief Type alias for export names. * @@ -172,6 +185,19 @@ std::optional parse_commonjs(std::string_view file_contents); */ const std::optional& get_last_error(); +/** + * @brief Get the location of the last failed parse operation. + * + * @return const std::optional& The last error location, or + * std::nullopt if unavailable. + * + * @note This is global state and may be overwritten by subsequent calls + * to parse_commonjs(). + * @note Location tracking is enabled when built with + * MERVE_ENABLE_ERROR_LOCATION. + */ +const std::optional& get_last_error_location(); + } // namespace lexer #endif // MERVE_PARSER_H diff --git a/rust/deps/merve_c.h b/rust/deps/merve_c.h index af4a9d7..c85aeea 100644 --- a/rust/deps/merve_c.h +++ b/rust/deps/merve_c.h @@ -39,6 +39,20 @@ typedef struct { int revision; } merve_version_components; +/** + * @brief Source location for a parse error. + * + * - line and column are 1-based. + * - offset is 0-based and measured in bytes from the start of input. + * + * A zeroed location (`{0, 0, 0}`) means the location is unavailable. + */ +typedef struct { + uint32_t line; + uint32_t column; + size_t offset; +} merve_error_loc; + /* Error codes corresponding to lexer::lexer_error values. */ #define MERVE_ERROR_TODO 0 #define MERVE_ERROR_UNEXPECTED_PAREN 1 @@ -74,6 +88,25 @@ extern "C" { */ merve_analysis merve_parse_commonjs(const char* input, size_t length); +/** + * Parse CommonJS source code and optionally return error location. + * + * Behaves like merve_parse_commonjs(). If @p out_err is non-NULL, it is always + * written: + * - On success: set to {0, 0, 0}. + * - On parse failure with known location: set to that location. + * - On parse failure without available location: set to {0, 0, 0}. + * + * @param input Pointer to the JavaScript source (need not be + * null-terminated). NULL is treated as an empty string. + * @param length Length of the input in bytes. + * @param out_err Optional output pointer for parse error location. + * @return A handle to the parse result, or NULL on out-of-memory. + * Use merve_is_valid() to check if parsing succeeded. + */ +merve_analysis merve_parse_commonjs_ex(const char* input, size_t length, + merve_error_loc* out_err); + /** * Check whether the parse result is valid (parsing succeeded). * diff --git a/rust/src/ffi.rs b/rust/src/ffi.rs index a966915..6b9c02e 100644 --- a/rust/src/ffi.rs +++ b/rust/src/ffi.rs @@ -25,8 +25,22 @@ pub struct merve_version_components { pub revision: c_int, } +#[cfg(feature = "error-location")] +#[repr(C)] +pub struct merve_error_loc { + pub line: u32, + pub column: u32, + pub offset: usize, +} + unsafe extern "C" { pub fn merve_parse_commonjs(input: *const c_char, length: usize) -> merve_analysis; + #[cfg(feature = "error-location")] + pub fn merve_parse_commonjs_ex( + input: *const c_char, + length: usize, + out_err: *mut merve_error_loc, + ) -> merve_analysis; pub fn merve_is_valid(result: merve_analysis) -> bool; pub fn merve_free(result: merve_analysis); pub fn merve_get_exports_count(result: merve_analysis) -> usize; diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 0997598..a2cff30 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -112,6 +112,74 @@ impl fmt::Display for LexerError { #[cfg(feature = "std")] impl std::error::Error for LexerError {} +/// 1-based error position with a 0-based byte offset. +#[cfg(feature = "error-location")] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct ErrorLocation { + /// 1-based line number. + pub line: NonZeroU32, + /// 1-based column number (byte-oriented). + pub column: NonZeroU32, + /// 0-based UTF-8 byte offset from the start of input. + pub offset: usize, +} + +#[cfg(feature = "error-location")] +impl ErrorLocation { + #[inline] + fn from_ffi(loc: ffi::merve_error_loc) -> Option { + Some(Self { + line: NonZeroU32::new(loc.line)?, + column: NonZeroU32::new(loc.column)?, + offset: loc.offset, + }) + } +} + +/// Lexer error with optional source location. +#[cfg(feature = "error-location")] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct LocatedLexerError { + /// Error kind reported by the lexer. + pub kind: LexerError, + /// Source location, if available. + pub location: Option, +} + +#[cfg(feature = "error-location")] +impl LocatedLexerError { + #[inline] + fn from_code_and_loc(code: i32, loc: ffi::merve_error_loc) -> Self { + let kind = if code >= 0 { + LexerError::from_code(code) + } else { + LexerError::Unknown(code) + }; + Self { + kind, + location: ErrorLocation::from_ffi(loc), + } + } +} + +#[cfg(feature = "error-location")] +impl fmt::Display for LocatedLexerError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(loc) = self.location { + write!( + f, + "{} at line {}, column {} (byte offset {})", + self.kind, loc.line, loc.column, loc.offset + ) + } else { + write!(f, "{}", self.kind) + } + } +} + +#[cfg(all(feature = "std", feature = "error-location"))] +impl std::error::Error for LocatedLexerError {} + /// A parsed CommonJS analysis result. /// /// The lifetime `'a` is tied to the source string passed to [`parse_commonjs`], @@ -320,6 +388,9 @@ impl ExactSizeIterator for ExportIter<'_, '_> {} /// Returns a [`LexerError`] if the input contains ESM syntax or other /// unsupported constructs. /// +/// Enable the `error-location` feature to use +/// [`parse_commonjs_with_location`] for location-aware errors. +/// /// # Examples /// /// ``` @@ -376,6 +447,53 @@ pub fn parse_commonjs(source: &str) -> Result, LexerError> { }) } +/// Parse CommonJS source and return location-aware errors. +/// +/// This API is available with the `error-location` feature. +/// +/// # Errors +/// +/// Returns [`LocatedLexerError`] on parse failure. Location data is optional +/// and depends on the underlying library build configuration. +#[cfg(feature = "error-location")] +pub fn parse_commonjs_with_location(source: &str) -> Result, LocatedLexerError> { + if source.is_empty() { + return Err(LocatedLexerError { + kind: LexerError::EmptySource, + location: Some(ErrorLocation { + line: NonZeroU32::new(1).expect("1 is non-zero"), + column: NonZeroU32::new(1).expect("1 is non-zero"), + offset: 0, + }), + }); + } + + let mut loc = ffi::merve_error_loc { + line: 0, + column: 0, + offset: 0, + }; + + let handle = + unsafe { ffi::merve_parse_commonjs_ex(source.as_ptr().cast(), source.len(), &mut loc) }; + if handle.is_null() { + let code = unsafe { ffi::merve_get_last_error() }; + return Err(LocatedLexerError::from_code_and_loc(code, loc)); + } + + if !unsafe { ffi::merve_is_valid(handle) } { + let code = unsafe { ffi::merve_get_last_error() }; + let err = LocatedLexerError::from_code_and_loc(code, loc); + unsafe { ffi::merve_free(handle) }; + return Err(err); + } + + Ok(Analysis { + handle, + _source: PhantomData, + }) +} + /// Get the merve library version string (e.g. `"1.0.1"`). #[must_use] pub fn version() -> &'static str { @@ -474,6 +592,36 @@ mod tests { assert_eq!(result.unwrap_err(), LexerError::EmptySource); } + #[cfg(feature = "error-location")] + #[test] + fn parse_with_location_reports_error_position() { + let source = "\n import 'x';"; + let result = parse_commonjs_with_location(source); + assert!(result.is_err()); + + let err = result.unwrap_err(); + assert_eq!(err.kind, LexerError::UnexpectedEsmImport); + let loc = err.location.expect("location should be present"); + assert_eq!(loc.line, NonZeroU32::new(2).unwrap()); + assert_eq!(loc.column, NonZeroU32::new(3).unwrap()); + assert_eq!(loc.offset, 3); + } + + #[cfg(feature = "error-location")] + #[test] + fn parse_with_location_empty_source() { + let result = parse_commonjs_with_location(""); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert_eq!(err.kind, LexerError::EmptySource); + let loc = err + .location + .expect("empty source location should be present"); + assert_eq!(loc.line, NonZeroU32::new(1).unwrap()); + assert_eq!(loc.column, NonZeroU32::new(1).unwrap()); + assert_eq!(loc.offset, 0); + } + #[test] fn out_of_bounds_returns_none() { let source = "exports.x = 1;"; @@ -558,6 +706,23 @@ mod tests { assert!(s.contains("99"), "got: {s}"); } + #[cfg(all(feature = "std", feature = "error-location"))] + #[test] + fn located_error_display_includes_location() { + let err = LocatedLexerError { + kind: LexerError::UnexpectedEsmImport, + location: Some(ErrorLocation { + line: NonZeroU32::new(2).unwrap(), + column: NonZeroU32::new(4).unwrap(), + offset: 9, + }), + }; + let s = format!("{err}"); + assert!(s.contains("line 2"), "got: {s}"); + assert!(s.contains("column 4"), "got: {s}"); + assert!(s.contains("offset 9"), "got: {s}"); + } + #[test] fn error_from_code_roundtrip() { for code in 0..=12 { @@ -574,6 +739,13 @@ mod tests { assert_error::(); } + #[cfg(all(feature = "std", feature = "error-location"))] + #[test] + fn located_error_is_std_error() { + fn assert_error() {} + assert_error::(); + } + #[test] fn bracket_notation_exports() { let source = r#"exports["hello-world"] = 1;"#; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4156149..83b9843 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -60,3 +60,7 @@ if(MERVE_USE_SIMDUTF) target_link_libraries(merve PRIVATE simdutf) target_compile_definitions(merve PRIVATE MERVE_USE_SIMDUTF=1) endif() + +if(MERVE_ENABLE_ERROR_LOCATION) + target_compile_definitions(merve PUBLIC MERVE_ENABLE_ERROR_LOCATION=1) +endif() diff --git a/src/merve_c.cpp b/src/merve_c.cpp index 1909fa3..dab818a 100644 --- a/src/merve_c.cpp +++ b/src/merve_c.cpp @@ -14,9 +14,31 @@ static merve_string merve_string_create(const char* data, size_t length) { return out; } +static void merve_error_loc_clear(merve_error_loc* out_err) { + if (!out_err) return; + out_err->line = 0; + out_err->column = 0; + out_err->offset = 0; +} + +static void merve_error_loc_set(merve_error_loc* out_err, + const lexer::error_location& loc) { + if (!out_err) return; + out_err->line = loc.line; + out_err->column = loc.column; + out_err->offset = loc.offset; +} + extern "C" { merve_analysis merve_parse_commonjs(const char* input, size_t length) { + return merve_parse_commonjs_ex(input, length, nullptr); +} + +merve_analysis merve_parse_commonjs_ex(const char* input, size_t length, + merve_error_loc* out_err) { + merve_error_loc_clear(out_err); + merve_analysis_impl* impl = new (std::nothrow) merve_analysis_impl(); if (!impl) return nullptr; if (input != nullptr) { @@ -24,6 +46,15 @@ merve_analysis merve_parse_commonjs(const char* input, size_t length) { } else { impl->result = lexer::parse_commonjs(std::string_view("", 0)); } + + if (!impl->result.has_value() && out_err) { + const std::optional& err_loc = + lexer::get_last_error_location(); + if (err_loc.has_value()) { + merve_error_loc_set(out_err, err_loc.value()); + } + } + return static_cast(impl); } diff --git a/src/parser.cpp b/src/parser.cpp index c977db0..d42ca66 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -312,6 +312,43 @@ struct StarExportBinding { // Thread-local state for error tracking (safe for concurrent parse calls). thread_local std::optional last_error; +thread_local std::optional last_error_location; + +#ifdef MERVE_ENABLE_ERROR_LOCATION +static error_location makeErrorLocation(const char* source, const char* end, const char* at) { + const char* target = at; + if (target < source) target = source; + if (target > end) target = end; + + uint32_t line = 1; + uint32_t column = 1; + const char* cur = source; + + while (cur < target) { + const char ch = *cur++; + if (ch == '\n') { + line++; + column = 1; + continue; + } + if (ch == '\r') { + line++; + column = 1; + if (cur < target && *cur == '\n') { + cur++; + } + continue; + } + column++; + } + + error_location loc{}; + loc.line = line; + loc.column = column; + loc.offset = static_cast(target - source); + return loc; +} +#endif // Lexer state class class CJSLexer { @@ -332,6 +369,7 @@ class CJSLexer { std::array templateStack_; std::array openTokenPosStack_; + std::array openTokenTypeStack_; std::array openClassPosStack; std::array starExportStack_; StarExportBinding* starExportStack; @@ -483,9 +521,15 @@ class CJSLexer { } // Parsing utilities - void syntaxError(lexer_error code) { + void syntaxError(lexer_error code, const char* at = nullptr) { if (!last_error) { last_error = code; +#ifdef MERVE_ENABLE_ERROR_LOCATION + const char* error_pos = at ? at : pos; + last_error_location = makeErrorLocation(source, end, error_pos); +#else + (void)at; +#endif } pos = end + 1; } @@ -1488,6 +1532,7 @@ class CJSLexer { char ch = commentWhitespace(); switch (ch) { case '(': + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = startPos; return; case '.': @@ -1501,7 +1546,7 @@ class CJSLexer { // It's something like import.metaData, not import.meta return; } - syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT_META); + syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT_META, startPos); } return; default: @@ -1516,17 +1561,18 @@ class CJSLexer { pos--; return; } - syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT); + syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT, startPos); } } void throwIfExportStatement() { + const char* startPos = pos; pos += 6; const char* curPos = pos; char ch = commentWhitespace(); if (pos == curPos && !isPunctuator(ch)) return; - syntaxError(lexer_error::UNEXPECTED_ESM_EXPORT); + syntaxError(lexer_error::UNEXPECTED_ESM_EXPORT, startPos); } public: @@ -1535,7 +1581,7 @@ class CJSLexer { templateStackDepth(0), openTokenDepth(0), templateDepth(0), line(1), lastSlashWasDivision(false), nextBraceIsClass(false), - templateStack_{}, openTokenPosStack_{}, openClassPosStack{}, + templateStack_{}, openTokenPosStack_{}, openTokenTypeStack_{}, openClassPosStack{}, starExportStack_{}, starExportStack(nullptr), STAR_EXPORT_STACK_END(nullptr), exports(out_exports), re_exports(out_re_exports) {} @@ -1600,6 +1646,7 @@ class CJSLexer { pos += 23; if (*pos == '(') { pos++; + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = lastTokenPos; if (tryParseRequire(RequireType::Import) && keywordStart(startPos)) tryBacktrackAddStarExportBinding(startPos - 1); @@ -1609,6 +1656,7 @@ class CJSLexer { if (pos + 4 < end && matchesAt(pos, end, "Star")) pos += 4; if (*pos == '(') { + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = lastTokenPos; if (*(pos + 1) == 'r') { pos++; @@ -1643,6 +1691,7 @@ class CJSLexer { tryParseObjectDefineOrKeys(openTokenDepth == 0); break; case '(': + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = lastTokenPos; break; case ')': @@ -1655,6 +1704,7 @@ class CJSLexer { case '{': openClassPosStack[openTokenDepth] = nextBraceIsClass; nextBraceIsClass = false; + openTokenTypeStack_[openTokenDepth] = '{'; openTokenPosStack_[openTokenDepth++] = lastTokenPos; break; case '}': @@ -1717,6 +1767,19 @@ class CJSLexer { lastTokenPos = pos; } + if (!last_error) { + if (templateDepth != std::numeric_limits::max()) { + syntaxError(lexer_error::UNTERMINATED_TEMPLATE_STRING, end); + } else if (openTokenDepth != 0) { + const char open_ch = openTokenTypeStack_[openTokenDepth - 1]; + if (open_ch == '{') { + syntaxError(lexer_error::UNTERMINATED_BRACE, end); + } else { + syntaxError(lexer_error::UNTERMINATED_PAREN, end); + } + } + } + if (templateDepth != std::numeric_limits::max() || openTokenDepth || last_error) { return false; } @@ -1727,6 +1790,7 @@ class CJSLexer { std::optional parse_commonjs(std::string_view file_contents) { last_error.reset(); + last_error_location.reset(); lexer_analysis result; CJSLexer lexer(result.exports, result.re_exports); @@ -1742,4 +1806,8 @@ const std::optional& get_last_error() { return last_error; } +const std::optional& get_last_error_location() { + return last_error_location; +} + } // namespace lexer diff --git a/tests/c_api_compile_test.c b/tests/c_api_compile_test.c index 77caaa1..b6f53dd 100644 --- a/tests/c_api_compile_test.c +++ b/tests/c_api_compile_test.c @@ -23,6 +23,16 @@ static void check_types(void) { merve_analysis a = (merve_analysis)0; (void)a; + merve_error_loc loc; + loc.line = 0; + loc.column = 0; + loc.offset = 0; + (void)loc; + + merve_analysis (*parse_ex)(const char*, size_t, merve_error_loc*) = + &merve_parse_commonjs_ex; + (void)parse_ex; + /* Verify the error constants are valid integer constant expressions. */ int errors[] = { MERVE_ERROR_TODO, diff --git a/tests/c_api_tests.cpp b/tests/c_api_tests.cpp index cdcd0e4..6bf7e20 100644 --- a/tests/c_api_tests.cpp +++ b/tests/c_api_tests.cpp @@ -11,6 +11,10 @@ static bool merve_string_eq(merve_string s, const char* expected) { return std::memcmp(s.data, expected, expected_len) == 0; } +static bool merve_error_loc_is_zero(merve_error_loc loc) { + return loc.line == 0 && loc.column == 0 && loc.offset == 0; +} + TEST(c_api_tests, version_string) { const char* version = merve_get_version(); ASSERT_NE(version, nullptr); @@ -106,6 +110,47 @@ TEST(c_api_tests, esm_import_error) { merve_free(result); } +TEST(c_api_tests, parse_commonjs_ex_success_clears_error_location) { + const char* source = "exports.foo = 1;"; + merve_error_loc loc{9, 9, 9}; + merve_analysis result = + merve_parse_commonjs_ex(source, std::strlen(source), &loc); + ASSERT_NE(result, nullptr); + ASSERT_TRUE(merve_is_valid(result)); + ASSERT_TRUE(merve_error_loc_is_zero(loc)); + merve_free(result); +} + +TEST(c_api_tests, parse_commonjs_ex_error_location) { + const char* source = "\n import 'x';"; + merve_error_loc loc{123, 456, 789}; + merve_analysis result = + merve_parse_commonjs_ex(source, std::strlen(source), &loc); + ASSERT_NE(result, nullptr); + ASSERT_FALSE(merve_is_valid(result)); + ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_IMPORT); + +#if defined(MERVE_ENABLE_ERROR_LOCATION) + ASSERT_EQ(loc.line, 2u); + ASSERT_EQ(loc.column, 3u); + ASSERT_EQ(loc.offset, 3u); +#else + ASSERT_TRUE(merve_error_loc_is_zero(loc)); +#endif + + merve_free(result); +} + +TEST(c_api_tests, parse_commonjs_ex_accepts_null_out_err) { + const char* source = "import 'x';"; + merve_analysis result = + merve_parse_commonjs_ex(source, std::strlen(source), NULL); + ASSERT_NE(result, nullptr); + ASSERT_FALSE(merve_is_valid(result)); + ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_IMPORT); + merve_free(result); +} + TEST(c_api_tests, esm_export_error) { const char* source = "export { x };"; merve_analysis result = merve_parse_commonjs(source, std::strlen(source)); diff --git a/tests/real_world_tests.cpp b/tests/real_world_tests.cpp index 08e54f4..cad4c9b 100644 --- a/tests/real_world_tests.cpp +++ b/tests/real_world_tests.cpp @@ -980,6 +980,41 @@ TEST(real_world_tests, esm_syntax_error_import_meta) { ASSERT_EQ(err, lexer::lexer_error::UNEXPECTED_ESM_IMPORT_META); } +TEST(real_world_tests, eof_unterminated_brace_error) { + auto result = lexer::parse_commonjs("(function test() {"); + ASSERT_FALSE(result.has_value()); + auto err = lexer::get_last_error(); + ASSERT_TRUE(err.has_value()); + ASSERT_EQ(err, lexer::lexer_error::UNTERMINATED_BRACE); +} + +TEST(real_world_tests, eof_unterminated_paren_error) { + auto result = lexer::parse_commonjs("(a + b"); + ASSERT_FALSE(result.has_value()); + auto err = lexer::get_last_error(); + ASSERT_TRUE(err.has_value()); + ASSERT_EQ(err, lexer::lexer_error::UNTERMINATED_PAREN); +} + +TEST(real_world_tests, error_location_state_resets_after_success) { + auto failed = lexer::parse_commonjs("\n import 'x';"); + ASSERT_FALSE(failed.has_value()); + + auto loc_after_error = lexer::get_last_error_location(); +#if defined(MERVE_ENABLE_ERROR_LOCATION) + ASSERT_TRUE(loc_after_error.has_value()); + ASSERT_EQ(loc_after_error->line, 2u); + ASSERT_EQ(loc_after_error->column, 3u); + ASSERT_EQ(loc_after_error->offset, 3u); +#else + ASSERT_FALSE(loc_after_error.has_value()); +#endif + + auto ok = lexer::parse_commonjs("exports.ok = 1;"); + ASSERT_TRUE(ok.has_value()); + ASSERT_FALSE(lexer::get_last_error_location().has_value()); +} + TEST(real_world_tests, unicode_escape_sequences) { // Test various unicode escape sequences in exports auto result = lexer::parse_commonjs("\ From 77e42d67003182acb19e0b3cd1457f94f2098343 Mon Sep 17 00:00:00 2001 From: cijiugechu Date: Fri, 27 Feb 2026 15:47:13 +0800 Subject: [PATCH 2/6] test: extend error-location coverage for CRLF, import.meta, and EOF --- rust/src/lib.rs | 41 +++++++++++++++++++ tests/c_api_tests.cpp | 80 ++++++++++++++++++++++++++++++++++++++ tests/real_world_tests.cpp | 49 +++++++++++++++++++++++ 3 files changed, 170 insertions(+) diff --git a/rust/src/lib.rs b/rust/src/lib.rs index a2cff30..80c70c3 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -622,6 +622,47 @@ mod tests { assert_eq!(loc.offset, 0); } + #[cfg(feature = "error-location")] + #[test] + fn parse_with_location_crlf_position() { + let source = "\r\n import 'x';"; + let result = parse_commonjs_with_location(source); + assert!(result.is_err()); + + let err = result.unwrap_err(); + assert_eq!(err.kind, LexerError::UnexpectedEsmImport); + let loc = err.location.expect("location should be present"); + assert_eq!(loc.line, NonZeroU32::new(2).unwrap()); + assert_eq!(loc.column, NonZeroU32::new(3).unwrap()); + assert_eq!(loc.offset, 4); + } + + #[cfg(feature = "error-location")] + #[test] + fn parse_with_location_import_meta_and_eof() { + let import_meta = parse_commonjs_with_location("\n import.meta.url"); + assert!(import_meta.is_err()); + let import_meta_err = import_meta.unwrap_err(); + assert_eq!(import_meta_err.kind, LexerError::UnexpectedEsmImportMeta); + let import_meta_loc = import_meta_err + .location + .expect("import.meta location should be present"); + assert_eq!(import_meta_loc.line, NonZeroU32::new(2).unwrap()); + assert_eq!(import_meta_loc.column, NonZeroU32::new(3).unwrap()); + assert_eq!(import_meta_loc.offset, 3); + + let eof = parse_commonjs_with_location("(a + b"); + assert!(eof.is_err()); + let eof_err = eof.unwrap_err(); + assert_eq!(eof_err.kind, LexerError::UnterminatedParen); + let eof_loc = eof_err + .location + .expect("unterminated paren location should be present"); + assert_eq!(eof_loc.line, NonZeroU32::new(1).unwrap()); + assert_eq!(eof_loc.column, NonZeroU32::new(7).unwrap()); + assert_eq!(eof_loc.offset, 6); + } + #[test] fn out_of_bounds_returns_none() { let source = "exports.x = 1;"; diff --git a/tests/c_api_tests.cpp b/tests/c_api_tests.cpp index 6bf7e20..e287396 100644 --- a/tests/c_api_tests.cpp +++ b/tests/c_api_tests.cpp @@ -141,6 +141,86 @@ TEST(c_api_tests, parse_commonjs_ex_error_location) { merve_free(result); } +TEST(c_api_tests, parse_commonjs_ex_error_location_crlf) { + const char* source = "\r\n import 'x';"; + merve_error_loc loc{123, 456, 789}; + merve_analysis result = + merve_parse_commonjs_ex(source, std::strlen(source), &loc); + ASSERT_NE(result, nullptr); + ASSERT_FALSE(merve_is_valid(result)); + ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_IMPORT); + +#if defined(MERVE_ENABLE_ERROR_LOCATION) + ASSERT_EQ(loc.line, 2u); + ASSERT_EQ(loc.column, 3u); + ASSERT_EQ(loc.offset, 4u); +#else + ASSERT_TRUE(merve_error_loc_is_zero(loc)); +#endif + + merve_free(result); +} + +TEST(c_api_tests, parse_commonjs_ex_export_error_location) { + const char* source = "\n export { x };"; + merve_error_loc loc{123, 456, 789}; + merve_analysis result = + merve_parse_commonjs_ex(source, std::strlen(source), &loc); + ASSERT_NE(result, nullptr); + ASSERT_FALSE(merve_is_valid(result)); + ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_EXPORT); + +#if defined(MERVE_ENABLE_ERROR_LOCATION) + ASSERT_EQ(loc.line, 2u); + ASSERT_EQ(loc.column, 3u); + ASSERT_EQ(loc.offset, 3u); +#else + ASSERT_TRUE(merve_error_loc_is_zero(loc)); +#endif + + merve_free(result); +} + +TEST(c_api_tests, parse_commonjs_ex_import_meta_error_location) { + const char* source = "\n import.meta.url"; + merve_error_loc loc{123, 456, 789}; + merve_analysis result = + merve_parse_commonjs_ex(source, std::strlen(source), &loc); + ASSERT_NE(result, nullptr); + ASSERT_FALSE(merve_is_valid(result)); + ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_IMPORT_META); + +#if defined(MERVE_ENABLE_ERROR_LOCATION) + ASSERT_EQ(loc.line, 2u); + ASSERT_EQ(loc.column, 3u); + ASSERT_EQ(loc.offset, 3u); +#else + ASSERT_TRUE(merve_error_loc_is_zero(loc)); +#endif + + merve_free(result); +} + +TEST(c_api_tests, parse_commonjs_ex_eof_unterminated_paren_location) { + const char* source = "(a + b"; + merve_error_loc loc{123, 456, 789}; + merve_analysis result = + merve_parse_commonjs_ex(source, std::strlen(source), &loc); + ASSERT_NE(result, nullptr); + ASSERT_FALSE(merve_is_valid(result)); + ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNTERMINATED_PAREN); + +#if defined(MERVE_ENABLE_ERROR_LOCATION) + ASSERT_EQ(loc.line, 1u); + ASSERT_EQ(loc.column, 7u); + ASSERT_EQ(loc.offset, 6u); +#else + ASSERT_TRUE(merve_error_loc_is_zero(loc)); +#endif + + merve_free(result); +} + TEST(c_api_tests, parse_commonjs_ex_accepts_null_out_err) { const char* source = "import 'x';"; merve_analysis result = diff --git a/tests/real_world_tests.cpp b/tests/real_world_tests.cpp index cad4c9b..47513e6 100644 --- a/tests/real_world_tests.cpp +++ b/tests/real_world_tests.cpp @@ -1015,6 +1015,55 @@ TEST(real_world_tests, error_location_state_resets_after_success) { ASSERT_FALSE(lexer::get_last_error_location().has_value()); } +TEST(real_world_tests, error_location_crlf_line_counting) { + auto failed = lexer::parse_commonjs("\r\n import 'x';"); + ASSERT_FALSE(failed.has_value()); + auto err = lexer::get_last_error(); + ASSERT_TRUE(err.has_value()); + ASSERT_EQ(err, lexer::lexer_error::UNEXPECTED_ESM_IMPORT); + + auto loc = lexer::get_last_error_location(); +#if defined(MERVE_ENABLE_ERROR_LOCATION) + ASSERT_TRUE(loc.has_value()); + ASSERT_EQ(loc->line, 2u); + ASSERT_EQ(loc->column, 3u); + ASSERT_EQ(loc->offset, 4u); +#else + ASSERT_FALSE(loc.has_value()); +#endif +} + +TEST(real_world_tests, error_location_import_meta_and_eof) { + auto import_meta = lexer::parse_commonjs("\n import.meta.url"); + ASSERT_FALSE(import_meta.has_value()); + ASSERT_EQ(lexer::get_last_error(), + lexer::lexer_error::UNEXPECTED_ESM_IMPORT_META); + + auto import_meta_loc = lexer::get_last_error_location(); +#if defined(MERVE_ENABLE_ERROR_LOCATION) + ASSERT_TRUE(import_meta_loc.has_value()); + ASSERT_EQ(import_meta_loc->line, 2u); + ASSERT_EQ(import_meta_loc->column, 3u); + ASSERT_EQ(import_meta_loc->offset, 3u); +#else + ASSERT_FALSE(import_meta_loc.has_value()); +#endif + + auto eof_unterminated = lexer::parse_commonjs("(a + b"); + ASSERT_FALSE(eof_unterminated.has_value()); + ASSERT_EQ(lexer::get_last_error(), lexer::lexer_error::UNTERMINATED_PAREN); + + auto eof_loc = lexer::get_last_error_location(); +#if defined(MERVE_ENABLE_ERROR_LOCATION) + ASSERT_TRUE(eof_loc.has_value()); + ASSERT_EQ(eof_loc->line, 1u); + ASSERT_EQ(eof_loc->column, 7u); + ASSERT_EQ(eof_loc->offset, 6u); +#else + ASSERT_FALSE(eof_loc.has_value()); +#endif +} + TEST(real_world_tests, unicode_escape_sequences) { // Test various unicode escape sequences in exports auto result = lexer::parse_commonjs("\ From fbb63b4147128717bc4d591d5fceee870920426b Mon Sep 17 00:00:00 2001 From: cijiugechu Date: Sat, 28 Feb 2026 09:59:11 +0800 Subject: [PATCH 3/6] refactor: remove offset from error locations Line/column are sufficient for error reporting; drop byte offsets across C and Rust APIs. --- README.md | 10 ++++------ include/merve/parser.h | 3 +-- include/merve_c.h | 9 ++++----- rust/README.md | 4 ++-- rust/deps/merve.cpp | 12 ++++-------- rust/deps/merve.h | 3 +-- rust/deps/merve_c.h | 9 ++++----- rust/src/ffi.rs | 1 - rust/src/lib.rs | 24 +++++------------------- src/merve_c.cpp | 2 -- src/parser.cpp | 1 - tests/c_api_compile_test.c | 1 - tests/c_api_tests.cpp | 19 +++++++------------ tests/real_world_tests.cpp | 4 ---- 14 files changed, 32 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index 524c380..b5ece57 100644 --- a/README.md +++ b/README.md @@ -146,8 +146,7 @@ is enabled when built with `MERVE_ENABLE_ERROR_LOCATION`. ```cpp struct error_location { uint32_t line; // 1-based - uint32_t column; // 1-based - size_t offset; // 0-based byte offset + uint32_t column; // 1-based (byte-oriented) }; ``` @@ -165,7 +164,7 @@ merve provides a C API (`merve_c.h`) for use from C programs, FFI bindings, or a int main(void) { const char* source = "exports.foo = 1;\nexports.bar = 2;\n"; - merve_error_loc err_loc = {0, 0, 0}; + merve_error_loc err_loc = {0, 0}; merve_analysis result = merve_parse_commonjs_ex( source, strlen(source), &err_loc); @@ -180,8 +179,7 @@ int main(void) { } else { printf("Parse error: %d\n", merve_get_last_error()); if (err_loc.line != 0) { - printf(" at line %u, column %u (byte offset %zu)\n", - err_loc.line, err_loc.column, err_loc.offset); + printf(" at line %u, column %u\n", err_loc.line, err_loc.column); } } @@ -206,7 +204,7 @@ Found 2 exports: | `merve_string` | Non-owning string reference (`data` + `length`). Not null-terminated. | | `merve_analysis` | Opaque handle to a parse result. Must be freed with `merve_free()`. | | `merve_version_components` | Struct with `major`, `minor`, `revision` fields. | -| `merve_error_loc` | Error location (`line`, `column`, `offset`). `{0,0,0}` means unavailable. | +| `merve_error_loc` | Error location (`line`, `column`). `{0,0}` means unavailable. | #### Functions diff --git a/include/merve/parser.h b/include/merve/parser.h index 2def0be..270fd0c 100644 --- a/include/merve/parser.h +++ b/include/merve/parser.h @@ -42,12 +42,11 @@ enum lexer_error { * @brief Source location information for a parse error. * * - line and column are 1-based. - * - offset is 0-based and measured in bytes from the start of input. + * - column is byte-oriented. */ struct error_location { uint32_t line; uint32_t column; - size_t offset; }; /** diff --git a/include/merve_c.h b/include/merve_c.h index c85aeea..f3fa42e 100644 --- a/include/merve_c.h +++ b/include/merve_c.h @@ -43,14 +43,13 @@ typedef struct { * @brief Source location for a parse error. * * - line and column are 1-based. - * - offset is 0-based and measured in bytes from the start of input. + * - column is byte-oriented. * - * A zeroed location (`{0, 0, 0}`) means the location is unavailable. + * A zeroed location (`{0, 0}`) means the location is unavailable. */ typedef struct { uint32_t line; uint32_t column; - size_t offset; } merve_error_loc; /* Error codes corresponding to lexer::lexer_error values. */ @@ -93,9 +92,9 @@ merve_analysis merve_parse_commonjs(const char* input, size_t length); * * Behaves like merve_parse_commonjs(). If @p out_err is non-NULL, it is always * written: - * - On success: set to {0, 0, 0}. + * - On success: set to {0, 0}. * - On parse failure with known location: set to that location. - * - On parse failure without available location: set to {0, 0, 0}. + * - On parse failure without available location: set to {0, 0}. * * @param input Pointer to the JavaScript source (need not be * null-terminated). NULL is treated as an empty string. diff --git a/rust/README.md b/rust/README.md index 1038219..134cc51 100644 --- a/rust/README.md +++ b/rust/README.md @@ -74,7 +74,7 @@ pub fn parse_commonjs_with_location( ``` Like `parse_commonjs`, but returns a `LocatedLexerError` that includes -`kind: LexerError` plus optional location (`line`, `column`, `offset`). +`kind: LexerError` plus optional location (`line`, `column`). ### `Analysis<'a>` @@ -127,7 +127,7 @@ pub struct LocatedLexerError { } ``` -`ErrorLocation` uses 1-based `line`/`column` and 0-based byte `offset`. +`ErrorLocation` uses 1-based `line`/`column` (byte-oriented column). ### Versioning helpers diff --git a/rust/deps/merve.cpp b/rust/deps/merve.cpp index c4ed4db..de1b3dd 100644 --- a/rust/deps/merve.cpp +++ b/rust/deps/merve.cpp @@ -347,7 +347,6 @@ static error_location makeErrorLocation(const char* source, const char* end, con error_location loc{}; loc.line = line; loc.column = column; - loc.offset = static_cast(target - source); return loc; } #endif @@ -1868,14 +1867,13 @@ typedef struct { * @brief Source location for a parse error. * * - line and column are 1-based. - * - offset is 0-based and measured in bytes from the start of input. + * - column is byte-oriented. * - * A zeroed location (`{0, 0, 0}`) means the location is unavailable. + * A zeroed location (`{0, 0}`) means the location is unavailable. */ typedef struct { uint32_t line; uint32_t column; - size_t offset; } merve_error_loc; /* Error codes corresponding to lexer::lexer_error values. */ @@ -1918,9 +1916,9 @@ merve_analysis merve_parse_commonjs(const char* input, size_t length); * * Behaves like merve_parse_commonjs(). If @p out_err is non-NULL, it is always * written: - * - On success: set to {0, 0, 0}. + * - On success: set to {0, 0}. * - On parse failure with known location: set to that location. - * - On parse failure without available location: set to {0, 0, 0}. + * - On parse failure without available location: set to {0, 0}. * * @param input Pointer to the JavaScript source (need not be * null-terminated). NULL is treated as an empty string. @@ -2046,7 +2044,6 @@ static void merve_error_loc_clear(merve_error_loc* out_err) { if (!out_err) return; out_err->line = 0; out_err->column = 0; - out_err->offset = 0; } static void merve_error_loc_set(merve_error_loc* out_err, @@ -2054,7 +2051,6 @@ static void merve_error_loc_set(merve_error_loc* out_err, if (!out_err) return; out_err->line = loc.line; out_err->column = loc.column; - out_err->offset = loc.offset; } extern "C" { diff --git a/rust/deps/merve.h b/rust/deps/merve.h index 2730841..bf9b40e 100644 --- a/rust/deps/merve.h +++ b/rust/deps/merve.h @@ -68,12 +68,11 @@ enum lexer_error { * @brief Source location information for a parse error. * * - line and column are 1-based. - * - offset is 0-based and measured in bytes from the start of input. + * - column is byte-oriented. */ struct error_location { uint32_t line; uint32_t column; - size_t offset; }; /** diff --git a/rust/deps/merve_c.h b/rust/deps/merve_c.h index c85aeea..f3fa42e 100644 --- a/rust/deps/merve_c.h +++ b/rust/deps/merve_c.h @@ -43,14 +43,13 @@ typedef struct { * @brief Source location for a parse error. * * - line and column are 1-based. - * - offset is 0-based and measured in bytes from the start of input. + * - column is byte-oriented. * - * A zeroed location (`{0, 0, 0}`) means the location is unavailable. + * A zeroed location (`{0, 0}`) means the location is unavailable. */ typedef struct { uint32_t line; uint32_t column; - size_t offset; } merve_error_loc; /* Error codes corresponding to lexer::lexer_error values. */ @@ -93,9 +92,9 @@ merve_analysis merve_parse_commonjs(const char* input, size_t length); * * Behaves like merve_parse_commonjs(). If @p out_err is non-NULL, it is always * written: - * - On success: set to {0, 0, 0}. + * - On success: set to {0, 0}. * - On parse failure with known location: set to that location. - * - On parse failure without available location: set to {0, 0, 0}. + * - On parse failure without available location: set to {0, 0}. * * @param input Pointer to the JavaScript source (need not be * null-terminated). NULL is treated as an empty string. diff --git a/rust/src/ffi.rs b/rust/src/ffi.rs index 6b9c02e..37d9b96 100644 --- a/rust/src/ffi.rs +++ b/rust/src/ffi.rs @@ -30,7 +30,6 @@ pub struct merve_version_components { pub struct merve_error_loc { pub line: u32, pub column: u32, - pub offset: usize, } unsafe extern "C" { diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 80c70c3..e11e23e 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -112,7 +112,7 @@ impl fmt::Display for LexerError { #[cfg(feature = "std")] impl std::error::Error for LexerError {} -/// 1-based error position with a 0-based byte offset. +/// 1-based error position. #[cfg(feature = "error-location")] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct ErrorLocation { @@ -120,8 +120,6 @@ pub struct ErrorLocation { pub line: NonZeroU32, /// 1-based column number (byte-oriented). pub column: NonZeroU32, - /// 0-based UTF-8 byte offset from the start of input. - pub offset: usize, } #[cfg(feature = "error-location")] @@ -131,7 +129,6 @@ impl ErrorLocation { Some(Self { line: NonZeroU32::new(loc.line)?, column: NonZeroU32::new(loc.column)?, - offset: loc.offset, }) } } @@ -168,8 +165,8 @@ impl fmt::Display for LocatedLexerError { if let Some(loc) = self.location { write!( f, - "{} at line {}, column {} (byte offset {})", - self.kind, loc.line, loc.column, loc.offset + "{} at line {}, column {}", + self.kind, loc.line, loc.column ) } else { write!(f, "{}", self.kind) @@ -463,16 +460,11 @@ pub fn parse_commonjs_with_location(source: &str) -> Result, Locate location: Some(ErrorLocation { line: NonZeroU32::new(1).expect("1 is non-zero"), column: NonZeroU32::new(1).expect("1 is non-zero"), - offset: 0, }), }); } - let mut loc = ffi::merve_error_loc { - line: 0, - column: 0, - offset: 0, - }; + let mut loc = ffi::merve_error_loc { line: 0, column: 0 }; let handle = unsafe { ffi::merve_parse_commonjs_ex(source.as_ptr().cast(), source.len(), &mut loc) }; @@ -604,7 +596,6 @@ mod tests { let loc = err.location.expect("location should be present"); assert_eq!(loc.line, NonZeroU32::new(2).unwrap()); assert_eq!(loc.column, NonZeroU32::new(3).unwrap()); - assert_eq!(loc.offset, 3); } #[cfg(feature = "error-location")] @@ -619,7 +610,6 @@ mod tests { .expect("empty source location should be present"); assert_eq!(loc.line, NonZeroU32::new(1).unwrap()); assert_eq!(loc.column, NonZeroU32::new(1).unwrap()); - assert_eq!(loc.offset, 0); } #[cfg(feature = "error-location")] @@ -634,7 +624,6 @@ mod tests { let loc = err.location.expect("location should be present"); assert_eq!(loc.line, NonZeroU32::new(2).unwrap()); assert_eq!(loc.column, NonZeroU32::new(3).unwrap()); - assert_eq!(loc.offset, 4); } #[cfg(feature = "error-location")] @@ -649,7 +638,6 @@ mod tests { .expect("import.meta location should be present"); assert_eq!(import_meta_loc.line, NonZeroU32::new(2).unwrap()); assert_eq!(import_meta_loc.column, NonZeroU32::new(3).unwrap()); - assert_eq!(import_meta_loc.offset, 3); let eof = parse_commonjs_with_location("(a + b"); assert!(eof.is_err()); @@ -660,7 +648,6 @@ mod tests { .expect("unterminated paren location should be present"); assert_eq!(eof_loc.line, NonZeroU32::new(1).unwrap()); assert_eq!(eof_loc.column, NonZeroU32::new(7).unwrap()); - assert_eq!(eof_loc.offset, 6); } #[test] @@ -755,13 +742,12 @@ mod tests { location: Some(ErrorLocation { line: NonZeroU32::new(2).unwrap(), column: NonZeroU32::new(4).unwrap(), - offset: 9, }), }; let s = format!("{err}"); assert!(s.contains("line 2"), "got: {s}"); assert!(s.contains("column 4"), "got: {s}"); - assert!(s.contains("offset 9"), "got: {s}"); + assert!(!s.contains("offset"), "got: {s}"); } #[test] diff --git a/src/merve_c.cpp b/src/merve_c.cpp index dab818a..420462e 100644 --- a/src/merve_c.cpp +++ b/src/merve_c.cpp @@ -18,7 +18,6 @@ static void merve_error_loc_clear(merve_error_loc* out_err) { if (!out_err) return; out_err->line = 0; out_err->column = 0; - out_err->offset = 0; } static void merve_error_loc_set(merve_error_loc* out_err, @@ -26,7 +25,6 @@ static void merve_error_loc_set(merve_error_loc* out_err, if (!out_err) return; out_err->line = loc.line; out_err->column = loc.column; - out_err->offset = loc.offset; } extern "C" { diff --git a/src/parser.cpp b/src/parser.cpp index d42ca66..1594e46 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -345,7 +345,6 @@ static error_location makeErrorLocation(const char* source, const char* end, con error_location loc{}; loc.line = line; loc.column = column; - loc.offset = static_cast(target - source); return loc; } #endif diff --git a/tests/c_api_compile_test.c b/tests/c_api_compile_test.c index b6f53dd..ca98843 100644 --- a/tests/c_api_compile_test.c +++ b/tests/c_api_compile_test.c @@ -26,7 +26,6 @@ static void check_types(void) { merve_error_loc loc; loc.line = 0; loc.column = 0; - loc.offset = 0; (void)loc; merve_analysis (*parse_ex)(const char*, size_t, merve_error_loc*) = diff --git a/tests/c_api_tests.cpp b/tests/c_api_tests.cpp index e287396..616c775 100644 --- a/tests/c_api_tests.cpp +++ b/tests/c_api_tests.cpp @@ -12,7 +12,7 @@ static bool merve_string_eq(merve_string s, const char* expected) { } static bool merve_error_loc_is_zero(merve_error_loc loc) { - return loc.line == 0 && loc.column == 0 && loc.offset == 0; + return loc.line == 0 && loc.column == 0; } TEST(c_api_tests, version_string) { @@ -112,7 +112,7 @@ TEST(c_api_tests, esm_import_error) { TEST(c_api_tests, parse_commonjs_ex_success_clears_error_location) { const char* source = "exports.foo = 1;"; - merve_error_loc loc{9, 9, 9}; + merve_error_loc loc{9, 9}; merve_analysis result = merve_parse_commonjs_ex(source, std::strlen(source), &loc); ASSERT_NE(result, nullptr); @@ -123,7 +123,7 @@ TEST(c_api_tests, parse_commonjs_ex_success_clears_error_location) { TEST(c_api_tests, parse_commonjs_ex_error_location) { const char* source = "\n import 'x';"; - merve_error_loc loc{123, 456, 789}; + merve_error_loc loc{123, 456}; merve_analysis result = merve_parse_commonjs_ex(source, std::strlen(source), &loc); ASSERT_NE(result, nullptr); @@ -133,7 +133,6 @@ TEST(c_api_tests, parse_commonjs_ex_error_location) { #if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_EQ(loc.line, 2u); ASSERT_EQ(loc.column, 3u); - ASSERT_EQ(loc.offset, 3u); #else ASSERT_TRUE(merve_error_loc_is_zero(loc)); #endif @@ -143,7 +142,7 @@ TEST(c_api_tests, parse_commonjs_ex_error_location) { TEST(c_api_tests, parse_commonjs_ex_error_location_crlf) { const char* source = "\r\n import 'x';"; - merve_error_loc loc{123, 456, 789}; + merve_error_loc loc{123, 456}; merve_analysis result = merve_parse_commonjs_ex(source, std::strlen(source), &loc); ASSERT_NE(result, nullptr); @@ -153,7 +152,6 @@ TEST(c_api_tests, parse_commonjs_ex_error_location_crlf) { #if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_EQ(loc.line, 2u); ASSERT_EQ(loc.column, 3u); - ASSERT_EQ(loc.offset, 4u); #else ASSERT_TRUE(merve_error_loc_is_zero(loc)); #endif @@ -163,7 +161,7 @@ TEST(c_api_tests, parse_commonjs_ex_error_location_crlf) { TEST(c_api_tests, parse_commonjs_ex_export_error_location) { const char* source = "\n export { x };"; - merve_error_loc loc{123, 456, 789}; + merve_error_loc loc{123, 456}; merve_analysis result = merve_parse_commonjs_ex(source, std::strlen(source), &loc); ASSERT_NE(result, nullptr); @@ -173,7 +171,6 @@ TEST(c_api_tests, parse_commonjs_ex_export_error_location) { #if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_EQ(loc.line, 2u); ASSERT_EQ(loc.column, 3u); - ASSERT_EQ(loc.offset, 3u); #else ASSERT_TRUE(merve_error_loc_is_zero(loc)); #endif @@ -183,7 +180,7 @@ TEST(c_api_tests, parse_commonjs_ex_export_error_location) { TEST(c_api_tests, parse_commonjs_ex_import_meta_error_location) { const char* source = "\n import.meta.url"; - merve_error_loc loc{123, 456, 789}; + merve_error_loc loc{123, 456}; merve_analysis result = merve_parse_commonjs_ex(source, std::strlen(source), &loc); ASSERT_NE(result, nullptr); @@ -193,7 +190,6 @@ TEST(c_api_tests, parse_commonjs_ex_import_meta_error_location) { #if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_EQ(loc.line, 2u); ASSERT_EQ(loc.column, 3u); - ASSERT_EQ(loc.offset, 3u); #else ASSERT_TRUE(merve_error_loc_is_zero(loc)); #endif @@ -203,7 +199,7 @@ TEST(c_api_tests, parse_commonjs_ex_import_meta_error_location) { TEST(c_api_tests, parse_commonjs_ex_eof_unterminated_paren_location) { const char* source = "(a + b"; - merve_error_loc loc{123, 456, 789}; + merve_error_loc loc{123, 456}; merve_analysis result = merve_parse_commonjs_ex(source, std::strlen(source), &loc); ASSERT_NE(result, nullptr); @@ -213,7 +209,6 @@ TEST(c_api_tests, parse_commonjs_ex_eof_unterminated_paren_location) { #if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_EQ(loc.line, 1u); ASSERT_EQ(loc.column, 7u); - ASSERT_EQ(loc.offset, 6u); #else ASSERT_TRUE(merve_error_loc_is_zero(loc)); #endif diff --git a/tests/real_world_tests.cpp b/tests/real_world_tests.cpp index 47513e6..34de06e 100644 --- a/tests/real_world_tests.cpp +++ b/tests/real_world_tests.cpp @@ -1005,7 +1005,6 @@ TEST(real_world_tests, error_location_state_resets_after_success) { ASSERT_TRUE(loc_after_error.has_value()); ASSERT_EQ(loc_after_error->line, 2u); ASSERT_EQ(loc_after_error->column, 3u); - ASSERT_EQ(loc_after_error->offset, 3u); #else ASSERT_FALSE(loc_after_error.has_value()); #endif @@ -1027,7 +1026,6 @@ TEST(real_world_tests, error_location_crlf_line_counting) { ASSERT_TRUE(loc.has_value()); ASSERT_EQ(loc->line, 2u); ASSERT_EQ(loc->column, 3u); - ASSERT_EQ(loc->offset, 4u); #else ASSERT_FALSE(loc.has_value()); #endif @@ -1044,7 +1042,6 @@ TEST(real_world_tests, error_location_import_meta_and_eof) { ASSERT_TRUE(import_meta_loc.has_value()); ASSERT_EQ(import_meta_loc->line, 2u); ASSERT_EQ(import_meta_loc->column, 3u); - ASSERT_EQ(import_meta_loc->offset, 3u); #else ASSERT_FALSE(import_meta_loc.has_value()); #endif @@ -1058,7 +1055,6 @@ TEST(real_world_tests, error_location_import_meta_and_eof) { ASSERT_TRUE(eof_loc.has_value()); ASSERT_EQ(eof_loc->line, 1u); ASSERT_EQ(eof_loc->column, 7u); - ASSERT_EQ(eof_loc->offset, 6u); #else ASSERT_FALSE(eof_loc.has_value()); #endif From 611a8dbf2ebaa68dd01d629455a635e2a269eb89 Mon Sep 17 00:00:00 2001 From: cijiugechu Date: Sat, 28 Feb 2026 10:35:15 +0800 Subject: [PATCH 4/6] refactor: simplify error location API Expose error line/column via an optional out parameter on merve_parse_commonjs and remove the separate *_ex entrypoint. Always track locations and drop the CMake/Rust feature gates, updating docs, tests, and CI accordingly. --- .github/workflows/ci.yml | 19 +-------------- README.md | 13 ++++------- cmake/lexer-flags.cmake | 1 - include/merve/parser.h | 3 +-- include/merve_c.h | 33 +++++++++++--------------- rust/Cargo.toml | 2 -- rust/README.md | 11 ++------- rust/build.rs | 4 ---- rust/deps/merve.cpp | 47 ++++++++++++------------------------- rust/deps/merve.h | 3 +-- rust/deps/merve_c.h | 33 +++++++++++--------------- rust/src/ffi.rs | 5 +--- rust/src/lib.rs | 30 +++++++----------------- src/CMakeLists.txt | 4 ---- src/merve_c.cpp | 8 ++----- src/parser.cpp | 6 ----- tests/c_api_compile_test.c | 6 ++--- tests/c_api_tests.cpp | 48 +++++++++++--------------------------- tests/real_world_tests.cpp | 16 ------------- 19 files changed, 80 insertions(+), 212 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c6da1a3..475671d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -91,12 +91,6 @@ jobs: cmake_gen: Ninja cmake_flags: "-DMERVE_USE_SIMDUTF=ON -DMERVE_SANITIZE=ON" name_suffix: " (simdutf, ASAN)" - # Ubuntu with error location tracking - - os: ubuntu-22.04 - cxx: g++-12 - cmake_gen: Ninja - cmake_flags: "-DMERVE_ENABLE_ERROR_LOCATION=ON" - name_suffix: " (error-location)" name: ${{ matrix.os }} ${{ matrix.cxx || 'MSVC' }}${{ matrix.name_suffix || '' }} runs-on: ${{ matrix.os }} @@ -170,22 +164,11 @@ jobs: working-directory: rust run: cargo clippy -- -D warnings - - name: Clippy (error-location) - working-directory: rust - run: cargo clippy --features error-location -- -D warnings - - name: Test working-directory: rust run: cargo test + - name: Test (no default features) working-directory: rust run: cargo test --no-default-features - - - name: Test (error-location) - working-directory: rust - run: cargo test --features error-location - - - name: Test (no default features + error-location) - working-directory: rust - run: cargo test --no-default-features --features error-location diff --git a/README.md b/README.md index b5ece57..01fb992 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,7 @@ const std::optional& get_last_error_location(); ``` Returns the location of the last parse error, if available. Location tracking -is enabled when built with `MERVE_ENABLE_ERROR_LOCATION`. +is best-effort and may be unavailable. ### `lexer::error_location` @@ -165,8 +165,7 @@ int main(void) { const char* source = "exports.foo = 1;\nexports.bar = 2;\n"; merve_error_loc err_loc = {0, 0}; - merve_analysis result = merve_parse_commonjs_ex( - source, strlen(source), &err_loc); + merve_analysis result = merve_parse_commonjs(source, strlen(source), &err_loc); if (merve_is_valid(result)) { size_t count = merve_get_exports_count(result); @@ -210,8 +209,7 @@ Found 2 exports: | Function | Description | |----------|-------------| -| `merve_parse_commonjs(input, length)` | Parse CommonJS source. Returns a handle (NULL only on OOM). | -| `merve_parse_commonjs_ex(input, length, out_err)` | Parse CommonJS source and optionally fill error location. | +| `merve_parse_commonjs(input, length, out_err)` | Parse CommonJS source and optionally fill error location. Returns a handle (NULL only on OOM). | | `merve_is_valid(result)` | Check if parsing succeeded. NULL-safe. | | `merve_free(result)` | Free a parse result. NULL-safe. | | `merve_get_exports_count(result)` | Number of named exports found. | @@ -224,8 +222,8 @@ Found 2 exports: | `merve_get_version()` | Version string (e.g. `"1.0.1"`). | | `merve_get_version_components()` | Version as `{major, minor, revision}`. | -Build with `-DMERVE_ENABLE_ERROR_LOCATION=ON` to enable non-zero locations -from `merve_parse_commonjs_ex`. +On parse failure, `merve_parse_commonjs` writes a non-zero location when +`out_err` is non-NULL and the location is available. #### Error Constants @@ -373,7 +371,6 @@ ctest --test-dir build | `MERVE_TESTING` | `ON` | Build test suite | | `MERVE_BENCHMARKS` | `OFF` | Build benchmarks | | `MERVE_USE_SIMDUTF` | `OFF` | Use simdutf for optimized string operations | -| `MERVE_ENABLE_ERROR_LOCATION` | `OFF` | Track parse error source locations | | `MERVE_SANITIZE` | `OFF` | Enable address sanitizer | ### Building with simdutf diff --git a/cmake/lexer-flags.cmake b/cmake/lexer-flags.cmake index d1aafc7..9ad92c8 100644 --- a/cmake/lexer-flags.cmake +++ b/cmake/lexer-flags.cmake @@ -1,7 +1,6 @@ option(MERVE_LOGGING "verbose output (useful for debugging)" OFF) option(MERVE_SANITIZE "Sanitize addresses" OFF) option(MERVE_USE_SIMDUTF "Use simdutf for optimized string operations" OFF) -option(MERVE_ENABLE_ERROR_LOCATION "Track parse error source locations" OFF) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") option(MERVE_SANITIZE_BOUNDS_STRICT "Sanitize bounds (strict): only for GCC" OFF) endif() diff --git a/include/merve/parser.h b/include/merve/parser.h index 270fd0c..6458872 100644 --- a/include/merve/parser.h +++ b/include/merve/parser.h @@ -166,8 +166,7 @@ const std::optional& get_last_error(); * * @note This is global state and may be overwritten by subsequent calls * to parse_commonjs(). - * @note Location tracking is enabled when built with - * MERVE_ENABLE_ERROR_LOCATION. + * @note Location tracking is best-effort and may be unavailable. */ const std::optional& get_last_error_location(); diff --git a/include/merve_c.h b/include/merve_c.h index f3fa42e..035ce21 100644 --- a/include/merve_c.h +++ b/include/merve_c.h @@ -72,39 +72,32 @@ extern "C" { #endif /** - * Parse CommonJS source code and extract export information. + * Parse CommonJS source code and optionally return error location. * * The source buffer must remain valid while accessing string_view-backed * export names from the returned handle. * - * You must call merve_free() on the returned handle when done. - * - * @param input Pointer to the JavaScript source (need not be null-terminated). - * NULL is treated as an empty string. - * @param length Length of the input in bytes. - * @return A handle to the parse result, or NULL on out-of-memory. - * Use merve_is_valid() to check if parsing succeeded. - */ -merve_analysis merve_parse_commonjs(const char* input, size_t length); - -/** - * Parse CommonJS source code and optionally return error location. - * - * Behaves like merve_parse_commonjs(). If @p out_err is non-NULL, it is always - * written: + * If @p out_err is non-NULL, it is always written: * - On success: set to {0, 0}. * - On parse failure with known location: set to that location. * - On parse failure without available location: set to {0, 0}. * + * You must call merve_free() on the returned handle when done. + * * @param input Pointer to the JavaScript source (need not be - * null-terminated). NULL is treated as an empty string. + * null-terminated). NULL is treated as an empty string. * @param length Length of the input in bytes. * @param out_err Optional output pointer for parse error location. * @return A handle to the parse result, or NULL on out-of-memory. * Use merve_is_valid() to check if parsing succeeded. */ -merve_analysis merve_parse_commonjs_ex(const char* input, size_t length, - merve_error_loc* out_err); +#ifdef __cplusplus +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err = nullptr); +#else +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err); +#endif /** * Check whether the parse result is valid (parsing succeeded). @@ -197,7 +190,7 @@ const char* merve_get_version(void); merve_version_components merve_get_version_components(void); #ifdef __cplusplus -} /* extern "C" */ +} /* extern "C" */ #endif #endif /* MERVE_C_H */ diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 91140e6..41a7d78 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -26,8 +26,6 @@ default = ["std"] libcpp = [] # enable allocations std = [] -# enable parse error location support -error-location = [] [package.metadata.docs.rs] rustdoc-args = ["--cfg", "docsrs"] diff --git a/rust/README.md b/rust/README.md index 134cc51..0decf1b 100644 --- a/rust/README.md +++ b/rust/README.md @@ -46,13 +46,6 @@ Requires `libc++` to be installed: merve = { version = "...", features = ["libcpp"] } ``` -**error-location**: Enables location-aware parse errors via -`parse_commonjs_with_location`. - -```toml -merve = { version = "...", features = ["error-location"] } -``` - ## API ### `parse_commonjs` @@ -65,7 +58,7 @@ Parse CommonJS source code and extract export information. The returned `Analysis` borrows from `source` because export names may point directly into the source buffer (zero-copy). -### `parse_commonjs_with_location` (`error-location` feature) +### `parse_commonjs_with_location` ```rust pub fn parse_commonjs_with_location( @@ -118,7 +111,7 @@ Returned when the input contains ESM syntax or malformed constructs: `LexerError` implements `Display` and, with the `std` feature, `std::error::Error`. -### `LocatedLexerError` (`error-location` feature) +### `LocatedLexerError` ```rust pub struct LocatedLexerError { diff --git a/rust/build.rs b/rust/build.rs index 48d5402..74db0a9 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -219,10 +219,6 @@ fn main() { build.include(&deps); build.cpp(true).std("c++20").warnings(false); - if env::var_os("CARGO_FEATURE_ERROR_LOCATION").is_some() { - build.define("MERVE_ENABLE_ERROR_LOCATION", Some("1")); - } - // Target handling let target_str = env::var("TARGET").unwrap(); let target: Vec = target_str.split('-').map(|s| s.into()).collect(); diff --git a/rust/deps/merve.cpp b/rust/deps/merve.cpp index de1b3dd..2f7e846 100644 --- a/rust/deps/merve.cpp +++ b/rust/deps/merve.cpp @@ -316,7 +316,6 @@ struct StarExportBinding { thread_local std::optional last_error; thread_local std::optional last_error_location; -#ifdef MERVE_ENABLE_ERROR_LOCATION static error_location makeErrorLocation(const char* source, const char* end, const char* at) { const char* target = at; if (target < source) target = source; @@ -349,7 +348,6 @@ static error_location makeErrorLocation(const char* source, const char* end, con loc.column = column; return loc; } -#endif // Lexer state class class CJSLexer { @@ -525,12 +523,8 @@ class CJSLexer { void syntaxError(lexer_error code, const char* at = nullptr) { if (!last_error) { last_error = code; -#ifdef MERVE_ENABLE_ERROR_LOCATION const char* error_pos = at ? at : pos; last_error_location = makeErrorLocation(source, end, error_pos); -#else - (void)at; -#endif } pos = end + 1; } @@ -1896,39 +1890,32 @@ extern "C" { #endif /** - * Parse CommonJS source code and extract export information. + * Parse CommonJS source code and optionally return error location. * * The source buffer must remain valid while accessing string_view-backed * export names from the returned handle. * - * You must call merve_free() on the returned handle when done. - * - * @param input Pointer to the JavaScript source (need not be null-terminated). - * NULL is treated as an empty string. - * @param length Length of the input in bytes. - * @return A handle to the parse result, or NULL on out-of-memory. - * Use merve_is_valid() to check if parsing succeeded. - */ -merve_analysis merve_parse_commonjs(const char* input, size_t length); - -/** - * Parse CommonJS source code and optionally return error location. - * - * Behaves like merve_parse_commonjs(). If @p out_err is non-NULL, it is always - * written: + * If @p out_err is non-NULL, it is always written: * - On success: set to {0, 0}. * - On parse failure with known location: set to that location. * - On parse failure without available location: set to {0, 0}. * + * You must call merve_free() on the returned handle when done. + * * @param input Pointer to the JavaScript source (need not be - * null-terminated). NULL is treated as an empty string. + * null-terminated). NULL is treated as an empty string. * @param length Length of the input in bytes. * @param out_err Optional output pointer for parse error location. * @return A handle to the parse result, or NULL on out-of-memory. * Use merve_is_valid() to check if parsing succeeded. */ -merve_analysis merve_parse_commonjs_ex(const char* input, size_t length, - merve_error_loc* out_err); +#ifdef __cplusplus +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err = nullptr); +#else +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err); +#endif /** * Check whether the parse result is valid (parsing succeeded). @@ -2021,7 +2008,7 @@ const char* merve_get_version(void); merve_version_components merve_get_version_components(void); #ifdef __cplusplus -} /* extern "C" */ +} /* extern "C" */ #endif #endif /* MERVE_C_H */ @@ -2055,12 +2042,8 @@ static void merve_error_loc_set(merve_error_loc* out_err, extern "C" { -merve_analysis merve_parse_commonjs(const char* input, size_t length) { - return merve_parse_commonjs_ex(input, length, nullptr); -} - -merve_analysis merve_parse_commonjs_ex(const char* input, size_t length, - merve_error_loc* out_err) { +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err) { merve_error_loc_clear(out_err); merve_analysis_impl* impl = new (std::nothrow) merve_analysis_impl(); diff --git a/rust/deps/merve.h b/rust/deps/merve.h index bf9b40e..e37bcef 100644 --- a/rust/deps/merve.h +++ b/rust/deps/merve.h @@ -192,8 +192,7 @@ const std::optional& get_last_error(); * * @note This is global state and may be overwritten by subsequent calls * to parse_commonjs(). - * @note Location tracking is enabled when built with - * MERVE_ENABLE_ERROR_LOCATION. + * @note Location tracking is best-effort and may be unavailable. */ const std::optional& get_last_error_location(); diff --git a/rust/deps/merve_c.h b/rust/deps/merve_c.h index f3fa42e..035ce21 100644 --- a/rust/deps/merve_c.h +++ b/rust/deps/merve_c.h @@ -72,39 +72,32 @@ extern "C" { #endif /** - * Parse CommonJS source code and extract export information. + * Parse CommonJS source code and optionally return error location. * * The source buffer must remain valid while accessing string_view-backed * export names from the returned handle. * - * You must call merve_free() on the returned handle when done. - * - * @param input Pointer to the JavaScript source (need not be null-terminated). - * NULL is treated as an empty string. - * @param length Length of the input in bytes. - * @return A handle to the parse result, or NULL on out-of-memory. - * Use merve_is_valid() to check if parsing succeeded. - */ -merve_analysis merve_parse_commonjs(const char* input, size_t length); - -/** - * Parse CommonJS source code and optionally return error location. - * - * Behaves like merve_parse_commonjs(). If @p out_err is non-NULL, it is always - * written: + * If @p out_err is non-NULL, it is always written: * - On success: set to {0, 0}. * - On parse failure with known location: set to that location. * - On parse failure without available location: set to {0, 0}. * + * You must call merve_free() on the returned handle when done. + * * @param input Pointer to the JavaScript source (need not be - * null-terminated). NULL is treated as an empty string. + * null-terminated). NULL is treated as an empty string. * @param length Length of the input in bytes. * @param out_err Optional output pointer for parse error location. * @return A handle to the parse result, or NULL on out-of-memory. * Use merve_is_valid() to check if parsing succeeded. */ -merve_analysis merve_parse_commonjs_ex(const char* input, size_t length, - merve_error_loc* out_err); +#ifdef __cplusplus +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err = nullptr); +#else +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err); +#endif /** * Check whether the parse result is valid (parsing succeeded). @@ -197,7 +190,7 @@ const char* merve_get_version(void); merve_version_components merve_get_version_components(void); #ifdef __cplusplus -} /* extern "C" */ +} /* extern "C" */ #endif #endif /* MERVE_C_H */ diff --git a/rust/src/ffi.rs b/rust/src/ffi.rs index 37d9b96..ab7148f 100644 --- a/rust/src/ffi.rs +++ b/rust/src/ffi.rs @@ -25,7 +25,6 @@ pub struct merve_version_components { pub revision: c_int, } -#[cfg(feature = "error-location")] #[repr(C)] pub struct merve_error_loc { pub line: u32, @@ -33,9 +32,7 @@ pub struct merve_error_loc { } unsafe extern "C" { - pub fn merve_parse_commonjs(input: *const c_char, length: usize) -> merve_analysis; - #[cfg(feature = "error-location")] - pub fn merve_parse_commonjs_ex( + pub fn merve_parse_commonjs( input: *const c_char, length: usize, out_err: *mut merve_error_loc, diff --git a/rust/src/lib.rs b/rust/src/lib.rs index e11e23e..cd46270 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -113,7 +113,6 @@ impl fmt::Display for LexerError { impl std::error::Error for LexerError {} /// 1-based error position. -#[cfg(feature = "error-location")] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct ErrorLocation { /// 1-based line number. @@ -122,7 +121,6 @@ pub struct ErrorLocation { pub column: NonZeroU32, } -#[cfg(feature = "error-location")] impl ErrorLocation { #[inline] fn from_ffi(loc: ffi::merve_error_loc) -> Option { @@ -134,7 +132,6 @@ impl ErrorLocation { } /// Lexer error with optional source location. -#[cfg(feature = "error-location")] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct LocatedLexerError { /// Error kind reported by the lexer. @@ -143,7 +140,6 @@ pub struct LocatedLexerError { pub location: Option, } -#[cfg(feature = "error-location")] impl LocatedLexerError { #[inline] fn from_code_and_loc(code: i32, loc: ffi::merve_error_loc) -> Self { @@ -159,7 +155,6 @@ impl LocatedLexerError { } } -#[cfg(feature = "error-location")] impl fmt::Display for LocatedLexerError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if let Some(loc) = self.location { @@ -174,7 +169,7 @@ impl fmt::Display for LocatedLexerError { } } -#[cfg(all(feature = "std", feature = "error-location"))] +#[cfg(feature = "std")] impl std::error::Error for LocatedLexerError {} /// A parsed CommonJS analysis result. @@ -385,8 +380,7 @@ impl ExactSizeIterator for ExportIter<'_, '_> {} /// Returns a [`LexerError`] if the input contains ESM syntax or other /// unsupported constructs. /// -/// Enable the `error-location` feature to use -/// [`parse_commonjs_with_location`] for location-aware errors. +/// Use [`parse_commonjs_with_location`] for location-aware errors. /// /// # Examples /// @@ -418,7 +412,9 @@ pub fn parse_commonjs(source: &str) -> Result, LexerError> { if source.is_empty() { return Err(LexerError::EmptySource); } - let handle = unsafe { ffi::merve_parse_commonjs(source.as_ptr().cast(), source.len()) }; + let handle = unsafe { + ffi::merve_parse_commonjs(source.as_ptr().cast(), source.len(), core::ptr::null_mut()) + }; if handle.is_null() { // NULL means allocation failure; map to a generic error let code = unsafe { ffi::merve_get_last_error() }; @@ -446,13 +442,9 @@ pub fn parse_commonjs(source: &str) -> Result, LexerError> { /// Parse CommonJS source and return location-aware errors. /// -/// This API is available with the `error-location` feature. -/// /// # Errors /// -/// Returns [`LocatedLexerError`] on parse failure. Location data is optional -/// and depends on the underlying library build configuration. -#[cfg(feature = "error-location")] +/// Returns [`LocatedLexerError`] on parse failure. Location data is optional. pub fn parse_commonjs_with_location(source: &str) -> Result, LocatedLexerError> { if source.is_empty() { return Err(LocatedLexerError { @@ -467,7 +459,7 @@ pub fn parse_commonjs_with_location(source: &str) -> Result, Locate let mut loc = ffi::merve_error_loc { line: 0, column: 0 }; let handle = - unsafe { ffi::merve_parse_commonjs_ex(source.as_ptr().cast(), source.len(), &mut loc) }; + unsafe { ffi::merve_parse_commonjs(source.as_ptr().cast(), source.len(), &mut loc) }; if handle.is_null() { let code = unsafe { ffi::merve_get_last_error() }; return Err(LocatedLexerError::from_code_and_loc(code, loc)); @@ -584,7 +576,6 @@ mod tests { assert_eq!(result.unwrap_err(), LexerError::EmptySource); } - #[cfg(feature = "error-location")] #[test] fn parse_with_location_reports_error_position() { let source = "\n import 'x';"; @@ -598,7 +589,6 @@ mod tests { assert_eq!(loc.column, NonZeroU32::new(3).unwrap()); } - #[cfg(feature = "error-location")] #[test] fn parse_with_location_empty_source() { let result = parse_commonjs_with_location(""); @@ -612,7 +602,6 @@ mod tests { assert_eq!(loc.column, NonZeroU32::new(1).unwrap()); } - #[cfg(feature = "error-location")] #[test] fn parse_with_location_crlf_position() { let source = "\r\n import 'x';"; @@ -626,7 +615,6 @@ mod tests { assert_eq!(loc.column, NonZeroU32::new(3).unwrap()); } - #[cfg(feature = "error-location")] #[test] fn parse_with_location_import_meta_and_eof() { let import_meta = parse_commonjs_with_location("\n import.meta.url"); @@ -734,7 +722,7 @@ mod tests { assert!(s.contains("99"), "got: {s}"); } - #[cfg(all(feature = "std", feature = "error-location"))] + #[cfg(feature = "std")] #[test] fn located_error_display_includes_location() { let err = LocatedLexerError { @@ -766,7 +754,7 @@ mod tests { assert_error::(); } - #[cfg(all(feature = "std", feature = "error-location"))] + #[cfg(feature = "std")] #[test] fn located_error_is_std_error() { fn assert_error() {} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 83b9843..4156149 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -60,7 +60,3 @@ if(MERVE_USE_SIMDUTF) target_link_libraries(merve PRIVATE simdutf) target_compile_definitions(merve PRIVATE MERVE_USE_SIMDUTF=1) endif() - -if(MERVE_ENABLE_ERROR_LOCATION) - target_compile_definitions(merve PUBLIC MERVE_ENABLE_ERROR_LOCATION=1) -endif() diff --git a/src/merve_c.cpp b/src/merve_c.cpp index 420462e..f05e460 100644 --- a/src/merve_c.cpp +++ b/src/merve_c.cpp @@ -29,12 +29,8 @@ static void merve_error_loc_set(merve_error_loc* out_err, extern "C" { -merve_analysis merve_parse_commonjs(const char* input, size_t length) { - return merve_parse_commonjs_ex(input, length, nullptr); -} - -merve_analysis merve_parse_commonjs_ex(const char* input, size_t length, - merve_error_loc* out_err) { +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err) { merve_error_loc_clear(out_err); merve_analysis_impl* impl = new (std::nothrow) merve_analysis_impl(); diff --git a/src/parser.cpp b/src/parser.cpp index 1594e46..3f87e8f 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -314,7 +314,6 @@ struct StarExportBinding { thread_local std::optional last_error; thread_local std::optional last_error_location; -#ifdef MERVE_ENABLE_ERROR_LOCATION static error_location makeErrorLocation(const char* source, const char* end, const char* at) { const char* target = at; if (target < source) target = source; @@ -347,7 +346,6 @@ static error_location makeErrorLocation(const char* source, const char* end, con loc.column = column; return loc; } -#endif // Lexer state class class CJSLexer { @@ -523,12 +521,8 @@ class CJSLexer { void syntaxError(lexer_error code, const char* at = nullptr) { if (!last_error) { last_error = code; -#ifdef MERVE_ENABLE_ERROR_LOCATION const char* error_pos = at ? at : pos; last_error_location = makeErrorLocation(source, end, error_pos); -#else - (void)at; -#endif } pos = end + 1; } diff --git a/tests/c_api_compile_test.c b/tests/c_api_compile_test.c index ca98843..e9419de 100644 --- a/tests/c_api_compile_test.c +++ b/tests/c_api_compile_test.c @@ -28,9 +28,9 @@ static void check_types(void) { loc.column = 0; (void)loc; - merve_analysis (*parse_ex)(const char*, size_t, merve_error_loc*) = - &merve_parse_commonjs_ex; - (void)parse_ex; + merve_analysis (*parse_fn)(const char*, size_t, merve_error_loc*) = + &merve_parse_commonjs; + (void)parse_fn; /* Verify the error constants are valid integer constant expressions. */ int errors[] = { diff --git a/tests/c_api_tests.cpp b/tests/c_api_tests.cpp index 616c775..ebc7386 100644 --- a/tests/c_api_tests.cpp +++ b/tests/c_api_tests.cpp @@ -110,116 +110,96 @@ TEST(c_api_tests, esm_import_error) { merve_free(result); } -TEST(c_api_tests, parse_commonjs_ex_success_clears_error_location) { +TEST(c_api_tests, parse_commonjs_success_clears_error_location) { const char* source = "exports.foo = 1;"; merve_error_loc loc{9, 9}; merve_analysis result = - merve_parse_commonjs_ex(source, std::strlen(source), &loc); + merve_parse_commonjs(source, std::strlen(source), &loc); ASSERT_NE(result, nullptr); ASSERT_TRUE(merve_is_valid(result)); ASSERT_TRUE(merve_error_loc_is_zero(loc)); merve_free(result); } -TEST(c_api_tests, parse_commonjs_ex_error_location) { +TEST(c_api_tests, parse_commonjs_error_location) { const char* source = "\n import 'x';"; merve_error_loc loc{123, 456}; merve_analysis result = - merve_parse_commonjs_ex(source, std::strlen(source), &loc); + merve_parse_commonjs(source, std::strlen(source), &loc); ASSERT_NE(result, nullptr); ASSERT_FALSE(merve_is_valid(result)); ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_IMPORT); -#if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_EQ(loc.line, 2u); ASSERT_EQ(loc.column, 3u); -#else - ASSERT_TRUE(merve_error_loc_is_zero(loc)); -#endif merve_free(result); } -TEST(c_api_tests, parse_commonjs_ex_error_location_crlf) { +TEST(c_api_tests, parse_commonjs_error_location_crlf) { const char* source = "\r\n import 'x';"; merve_error_loc loc{123, 456}; merve_analysis result = - merve_parse_commonjs_ex(source, std::strlen(source), &loc); + merve_parse_commonjs(source, std::strlen(source), &loc); ASSERT_NE(result, nullptr); ASSERT_FALSE(merve_is_valid(result)); ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_IMPORT); -#if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_EQ(loc.line, 2u); ASSERT_EQ(loc.column, 3u); -#else - ASSERT_TRUE(merve_error_loc_is_zero(loc)); -#endif merve_free(result); } -TEST(c_api_tests, parse_commonjs_ex_export_error_location) { +TEST(c_api_tests, parse_commonjs_export_error_location) { const char* source = "\n export { x };"; merve_error_loc loc{123, 456}; merve_analysis result = - merve_parse_commonjs_ex(source, std::strlen(source), &loc); + merve_parse_commonjs(source, std::strlen(source), &loc); ASSERT_NE(result, nullptr); ASSERT_FALSE(merve_is_valid(result)); ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_EXPORT); -#if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_EQ(loc.line, 2u); ASSERT_EQ(loc.column, 3u); -#else - ASSERT_TRUE(merve_error_loc_is_zero(loc)); -#endif merve_free(result); } -TEST(c_api_tests, parse_commonjs_ex_import_meta_error_location) { +TEST(c_api_tests, parse_commonjs_import_meta_error_location) { const char* source = "\n import.meta.url"; merve_error_loc loc{123, 456}; merve_analysis result = - merve_parse_commonjs_ex(source, std::strlen(source), &loc); + merve_parse_commonjs(source, std::strlen(source), &loc); ASSERT_NE(result, nullptr); ASSERT_FALSE(merve_is_valid(result)); ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_IMPORT_META); -#if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_EQ(loc.line, 2u); ASSERT_EQ(loc.column, 3u); -#else - ASSERT_TRUE(merve_error_loc_is_zero(loc)); -#endif merve_free(result); } -TEST(c_api_tests, parse_commonjs_ex_eof_unterminated_paren_location) { +TEST(c_api_tests, parse_commonjs_eof_unterminated_paren_location) { const char* source = "(a + b"; merve_error_loc loc{123, 456}; merve_analysis result = - merve_parse_commonjs_ex(source, std::strlen(source), &loc); + merve_parse_commonjs(source, std::strlen(source), &loc); ASSERT_NE(result, nullptr); ASSERT_FALSE(merve_is_valid(result)); ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNTERMINATED_PAREN); -#if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_EQ(loc.line, 1u); ASSERT_EQ(loc.column, 7u); -#else - ASSERT_TRUE(merve_error_loc_is_zero(loc)); -#endif merve_free(result); } -TEST(c_api_tests, parse_commonjs_ex_accepts_null_out_err) { +TEST(c_api_tests, parse_commonjs_accepts_null_out_err) { const char* source = "import 'x';"; merve_analysis result = - merve_parse_commonjs_ex(source, std::strlen(source), NULL); + merve_parse_commonjs(source, std::strlen(source), NULL); ASSERT_NE(result, nullptr); ASSERT_FALSE(merve_is_valid(result)); ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_IMPORT); diff --git a/tests/real_world_tests.cpp b/tests/real_world_tests.cpp index 34de06e..12a3bf7 100644 --- a/tests/real_world_tests.cpp +++ b/tests/real_world_tests.cpp @@ -1001,13 +1001,9 @@ TEST(real_world_tests, error_location_state_resets_after_success) { ASSERT_FALSE(failed.has_value()); auto loc_after_error = lexer::get_last_error_location(); -#if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_TRUE(loc_after_error.has_value()); ASSERT_EQ(loc_after_error->line, 2u); ASSERT_EQ(loc_after_error->column, 3u); -#else - ASSERT_FALSE(loc_after_error.has_value()); -#endif auto ok = lexer::parse_commonjs("exports.ok = 1;"); ASSERT_TRUE(ok.has_value()); @@ -1022,13 +1018,9 @@ TEST(real_world_tests, error_location_crlf_line_counting) { ASSERT_EQ(err, lexer::lexer_error::UNEXPECTED_ESM_IMPORT); auto loc = lexer::get_last_error_location(); -#if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_TRUE(loc.has_value()); ASSERT_EQ(loc->line, 2u); ASSERT_EQ(loc->column, 3u); -#else - ASSERT_FALSE(loc.has_value()); -#endif } TEST(real_world_tests, error_location_import_meta_and_eof) { @@ -1038,26 +1030,18 @@ TEST(real_world_tests, error_location_import_meta_and_eof) { lexer::lexer_error::UNEXPECTED_ESM_IMPORT_META); auto import_meta_loc = lexer::get_last_error_location(); -#if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_TRUE(import_meta_loc.has_value()); ASSERT_EQ(import_meta_loc->line, 2u); ASSERT_EQ(import_meta_loc->column, 3u); -#else - ASSERT_FALSE(import_meta_loc.has_value()); -#endif auto eof_unterminated = lexer::parse_commonjs("(a + b"); ASSERT_FALSE(eof_unterminated.has_value()); ASSERT_EQ(lexer::get_last_error(), lexer::lexer_error::UNTERMINATED_PAREN); auto eof_loc = lexer::get_last_error_location(); -#if defined(MERVE_ENABLE_ERROR_LOCATION) ASSERT_TRUE(eof_loc.has_value()); ASSERT_EQ(eof_loc->line, 1u); ASSERT_EQ(eof_loc->column, 7u); -#else - ASSERT_FALSE(eof_loc.has_value()); -#endif } TEST(real_world_tests, unicode_escape_sequences) { From b0271a18a4526cd57de717325dd4ec034ee91631 Mon Sep 17 00:00:00 2001 From: cijiugechu Date: Sat, 28 Feb 2026 11:13:10 +0800 Subject: [PATCH 5/6] refactor(rust): unify parse API with located errors Drop the separate parse_commonjs_with_location entrypoint and make parse_commonjs return LocatedLexerError, so callers always get kind plus optional line/column. --- rust/README.md | 13 +------- rust/src/lib.rs | 82 +++++++++++-------------------------------------- 2 files changed, 19 insertions(+), 76 deletions(-) diff --git a/rust/README.md b/rust/README.md index 0decf1b..088bb55 100644 --- a/rust/README.md +++ b/rust/README.md @@ -51,24 +51,13 @@ merve = { version = "...", features = ["libcpp"] } ### `parse_commonjs` ```rust -pub fn parse_commonjs(source: &str) -> Result, LexerError> +pub fn parse_commonjs(source: &str) -> Result, LocatedLexerError> ``` Parse CommonJS source code and extract export information. The returned `Analysis` borrows from `source` because export names may point directly into the source buffer (zero-copy). -### `parse_commonjs_with_location` - -```rust -pub fn parse_commonjs_with_location( - source: &str, -) -> Result, LocatedLexerError> -``` - -Like `parse_commonjs`, but returns a `LocatedLexerError` that includes -`kind: LexerError` plus optional location (`line`, `column`). - ### `Analysis<'a>` | Method | Returns | Description | diff --git a/rust/src/lib.rs b/rust/src/lib.rs index cd46270..841fe01 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -377,11 +377,9 @@ impl ExactSizeIterator for ExportIter<'_, '_> {} /// /// # Errors /// -/// Returns a [`LexerError`] if the input contains ESM syntax or other +/// Returns a [`LocatedLexerError`] if the input contains ESM syntax or other /// unsupported constructs. /// -/// Use [`parse_commonjs_with_location`] for location-aware errors. -/// /// # Examples /// /// ``` @@ -408,44 +406,7 @@ impl ExactSizeIterator for ExportIter<'_, '_> {} /// let _ = leaked; /// } /// ``` -pub fn parse_commonjs(source: &str) -> Result, LexerError> { - if source.is_empty() { - return Err(LexerError::EmptySource); - } - let handle = unsafe { - ffi::merve_parse_commonjs(source.as_ptr().cast(), source.len(), core::ptr::null_mut()) - }; - if handle.is_null() { - // NULL means allocation failure; map to a generic error - let code = unsafe { ffi::merve_get_last_error() }; - return Err(if code >= 0 { - LexerError::from_code(code) - } else { - LexerError::Unknown(code) - }); - } - if !unsafe { ffi::merve_is_valid(handle) } { - let code = unsafe { ffi::merve_get_last_error() }; - let err = if code >= 0 { - LexerError::from_code(code) - } else { - LexerError::Unknown(code) - }; - unsafe { ffi::merve_free(handle) }; - return Err(err); - } - Ok(Analysis { - handle, - _source: PhantomData, - }) -} - -/// Parse CommonJS source and return location-aware errors. -/// -/// # Errors -/// -/// Returns [`LocatedLexerError`] on parse failure. Location data is optional. -pub fn parse_commonjs_with_location(source: &str) -> Result, LocatedLexerError> { +pub fn parse_commonjs(source: &str) -> Result, LocatedLexerError> { if source.is_empty() { return Err(LocatedLexerError { kind: LexerError::EmptySource, @@ -557,7 +518,7 @@ mod tests { let result = parse_commonjs(source); assert!(result.is_err()); let err = result.unwrap_err(); - assert_eq!(err, LexerError::UnexpectedEsmImport); + assert_eq!(err.kind, LexerError::UnexpectedEsmImport); } #[test] @@ -566,20 +527,26 @@ mod tests { let result = parse_commonjs(source); assert!(result.is_err()); let err = result.unwrap_err(); - assert_eq!(err, LexerError::UnexpectedEsmExport); + assert_eq!(err.kind, LexerError::UnexpectedEsmExport); } #[test] fn empty_input() { let result = parse_commonjs(""); assert!(result.is_err()); - assert_eq!(result.unwrap_err(), LexerError::EmptySource); + let err = result.unwrap_err(); + assert_eq!(err.kind, LexerError::EmptySource); + let loc = err + .location + .expect("empty source location should be present"); + assert_eq!(loc.line, NonZeroU32::new(1).unwrap()); + assert_eq!(loc.column, NonZeroU32::new(1).unwrap()); } #[test] - fn parse_with_location_reports_error_position() { + fn parse_reports_error_position() { let source = "\n import 'x';"; - let result = parse_commonjs_with_location(source); + let result = parse_commonjs(source); assert!(result.is_err()); let err = result.unwrap_err(); @@ -590,22 +557,9 @@ mod tests { } #[test] - fn parse_with_location_empty_source() { - let result = parse_commonjs_with_location(""); - assert!(result.is_err()); - let err = result.unwrap_err(); - assert_eq!(err.kind, LexerError::EmptySource); - let loc = err - .location - .expect("empty source location should be present"); - assert_eq!(loc.line, NonZeroU32::new(1).unwrap()); - assert_eq!(loc.column, NonZeroU32::new(1).unwrap()); - } - - #[test] - fn parse_with_location_crlf_position() { + fn parse_crlf_position() { let source = "\r\n import 'x';"; - let result = parse_commonjs_with_location(source); + let result = parse_commonjs(source); assert!(result.is_err()); let err = result.unwrap_err(); @@ -616,8 +570,8 @@ mod tests { } #[test] - fn parse_with_location_import_meta_and_eof() { - let import_meta = parse_commonjs_with_location("\n import.meta.url"); + fn parse_import_meta_and_eof() { + let import_meta = parse_commonjs("\n import.meta.url"); assert!(import_meta.is_err()); let import_meta_err = import_meta.unwrap_err(); assert_eq!(import_meta_err.kind, LexerError::UnexpectedEsmImportMeta); @@ -627,7 +581,7 @@ mod tests { assert_eq!(import_meta_loc.line, NonZeroU32::new(2).unwrap()); assert_eq!(import_meta_loc.column, NonZeroU32::new(3).unwrap()); - let eof = parse_commonjs_with_location("(a + b"); + let eof = parse_commonjs("(a + b"); assert!(eof.is_err()); let eof_err = eof.unwrap_err(); assert_eq!(eof_err.kind, LexerError::UnterminatedParen); From 87eecebdd65d0de648e6b647ac05dc949d4e0096 Mon Sep 17 00:00:00 2001 From: cijiugechu Date: Sat, 28 Feb 2026 13:29:33 +0800 Subject: [PATCH 6/6] build(rust): split vendored C API source Stop amalgamating merve_c.cpp into rust/deps/merve.cpp; vendor it as rust/deps/merve_c.cpp and compile both translation units. This keeps merve_error_loc helpers in the C API layer and matches the upstream file layout. --- rust/build.rs | 38 ++--- rust/deps/merve.cpp | 338 ------------------------------------------ rust/deps/merve_c.cpp | 129 ++++++++++++++++ src/merve_c.cpp | 14 +- 4 files changed, 155 insertions(+), 364 deletions(-) create mode 100644 rust/deps/merve_c.cpp diff --git a/rust/build.rs b/rust/build.rs index 74db0a9..e691882 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -133,8 +133,8 @@ fn amalgamate_file( out.push_str(&format!("/* end file {filename} */\n")); } -/// When building inside the merve repository, produce the three amalgamated -/// files in `deps/`: merve.h, merve.cpp, merve_c.h. +/// When building inside the merve repository, produce the vendored +/// sources in `deps/`: merve.h, merve.cpp, merve_c.cpp, merve_c.h. fn amalgamate_from_repo(project_root: &Path, deps: &Path) { let include_path = project_root.join("include"); let source_path = project_root.join("src"); @@ -159,21 +159,23 @@ fn amalgamate_from_repo(project_root: &Path, deps: &Path) { ); fs::write(deps.join("merve.h"), &header).expect("failed to write deps/merve.h"); - // 2. Amalgamate merve.cpp (parser.cpp + merve_c.cpp with includes resolved). + // 2. Amalgamate merve.cpp (parser.cpp with includes resolved). let mut source = String::from("#include \"merve.h\"\n\n"); - for cpp in &["parser.cpp", "merve_c.cpp"] { - amalgamate_file( - &include_path, - &source_path, - &source_path, - cpp, - &mut source, - &mut included, - ); - } + amalgamate_file( + &include_path, + &source_path, + &source_path, + "parser.cpp", + &mut source, + &mut included, + ); fs::write(deps.join("merve.cpp"), &source).expect("failed to write deps/merve.cpp"); - // 3. Copy merve_c.h verbatim (standalone C header). + // 3. Copy merve_c.cpp verbatim (C API implementation). + fs::copy(source_path.join("merve_c.cpp"), deps.join("merve_c.cpp")) + .expect("failed to copy merve_c.cpp"); + + // 4. Copy merve_c.h verbatim (standalone C header). fs::copy(include_path.join("merve_c.h"), deps.join("merve_c.h")) .expect("failed to copy merve_c.h"); } @@ -207,15 +209,17 @@ fn main() { } } - // Both in-repo and published crate use the same layout: merve.cpp + merve.h + merve_c.h + // Both in-repo and published crate use the same layout: + // merve.cpp + merve_c.cpp + merve.h + merve_c.h assert!( - deps.join("merve.cpp").exists(), + deps.join("merve.cpp").exists() && deps.join("merve_c.cpp").exists(), "No C++ sources found in deps/. \ - When building outside the repository, deps/ must contain the amalgamated sources." + When building outside the repository, deps/ must contain the vendored sources." ); let mut build = cc::Build::new(); build.file(deps.join("merve.cpp")); + build.file(deps.join("merve_c.cpp")); build.include(&deps); build.cpp(true).std("c++20").warnings(false); diff --git a/rust/deps/merve.cpp b/rust/deps/merve.cpp index 2f7e846..7df6f39 100644 --- a/rust/deps/merve.cpp +++ b/rust/deps/merve.cpp @@ -1807,341 +1807,3 @@ const std::optional& get_last_error_location() { } // namespace lexer /* end file parser.cpp */ -/* begin file merve_c.cpp */ -/* begin file merve.h */ -#ifndef MERVE_H -#define MERVE_H - - -#endif // MERVE_H -/* end file merve.h */ -/* begin file merve_c.h */ -/** - * @file merve_c.h - * @brief Includes the C definitions for merve. This is a C file, not C++. - */ -#ifndef MERVE_C_H -#define MERVE_C_H - -#include -#include -#include - -/** - * @brief Non-owning string reference. - * - * The data pointer is NOT null-terminated. Always use the length field. - * - * The data is valid as long as: - * - The merve_analysis handle that produced it has not been freed. - * - For string_view-backed exports: the original source buffer is alive. - */ -typedef struct { - const char* data; - size_t length; -} merve_string; - -/** - * @brief Opaque handle to a CommonJS parse result. - * - * Created by merve_parse_commonjs(). Must be freed with merve_free(). - */ -typedef void* merve_analysis; - -/** - * @brief Version number components. - */ -typedef struct { - int major; - int minor; - int revision; -} merve_version_components; - -/** - * @brief Source location for a parse error. - * - * - line and column are 1-based. - * - column is byte-oriented. - * - * A zeroed location (`{0, 0}`) means the location is unavailable. - */ -typedef struct { - uint32_t line; - uint32_t column; -} merve_error_loc; - -/* Error codes corresponding to lexer::lexer_error values. */ -#define MERVE_ERROR_TODO 0 -#define MERVE_ERROR_UNEXPECTED_PAREN 1 -#define MERVE_ERROR_UNEXPECTED_BRACE 2 -#define MERVE_ERROR_UNTERMINATED_PAREN 3 -#define MERVE_ERROR_UNTERMINATED_BRACE 4 -#define MERVE_ERROR_UNTERMINATED_TEMPLATE_STRING 5 -#define MERVE_ERROR_UNTERMINATED_STRING_LITERAL 6 -#define MERVE_ERROR_UNTERMINATED_REGEX_CHARACTER_CLASS 7 -#define MERVE_ERROR_UNTERMINATED_REGEX 8 -#define MERVE_ERROR_UNEXPECTED_ESM_IMPORT_META 9 -#define MERVE_ERROR_UNEXPECTED_ESM_IMPORT 10 -#define MERVE_ERROR_UNEXPECTED_ESM_EXPORT 11 -#define MERVE_ERROR_TEMPLATE_NEST_OVERFLOW 12 - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Parse CommonJS source code and optionally return error location. - * - * The source buffer must remain valid while accessing string_view-backed - * export names from the returned handle. - * - * If @p out_err is non-NULL, it is always written: - * - On success: set to {0, 0}. - * - On parse failure with known location: set to that location. - * - On parse failure without available location: set to {0, 0}. - * - * You must call merve_free() on the returned handle when done. - * - * @param input Pointer to the JavaScript source (need not be - * null-terminated). NULL is treated as an empty string. - * @param length Length of the input in bytes. - * @param out_err Optional output pointer for parse error location. - * @return A handle to the parse result, or NULL on out-of-memory. - * Use merve_is_valid() to check if parsing succeeded. - */ -#ifdef __cplusplus -merve_analysis merve_parse_commonjs(const char* input, size_t length, - merve_error_loc* out_err = nullptr); -#else -merve_analysis merve_parse_commonjs(const char* input, size_t length, - merve_error_loc* out_err); -#endif - -/** - * Check whether the parse result is valid (parsing succeeded). - * - * @param result Handle returned by merve_parse_commonjs(). NULL returns false. - * @return true if parsing succeeded, false otherwise. - */ -bool merve_is_valid(merve_analysis result); - -/** - * Free a parse result and all associated memory. - * - * @param result Handle returned by merve_parse_commonjs(). NULL is a no-op. - */ -void merve_free(merve_analysis result); - -/** - * Get the number of named exports found. - * - * @param result A parse result handle. NULL returns 0. - * @return Number of exports, or 0 if result is NULL or invalid. - */ -size_t merve_get_exports_count(merve_analysis result); - -/** - * Get the number of re-export module specifiers found. - * - * @param result A parse result handle. NULL returns 0. - * @return Number of re-exports, or 0 if result is NULL or invalid. - */ -size_t merve_get_reexports_count(merve_analysis result); - -/** - * Get the name of an export at the given index. - * - * @param result A valid parse result handle. - * @param index Zero-based index (must be < merve_get_exports_count()). - * @return Non-owning string reference. Returns {NULL, 0} on error. - */ -merve_string merve_get_export_name(merve_analysis result, size_t index); - -/** - * Get the 1-based source line number of an export. - * - * @param result A valid parse result handle. - * @param index Zero-based index (must be < merve_get_exports_count()). - * @return 1-based line number, or 0 on error. - */ -uint32_t merve_get_export_line(merve_analysis result, size_t index); - -/** - * Get the module specifier of a re-export at the given index. - * - * @param result A valid parse result handle. - * @param index Zero-based index (must be < merve_get_reexports_count()). - * @return Non-owning string reference. Returns {NULL, 0} on error. - */ -merve_string merve_get_reexport_name(merve_analysis result, size_t index); - -/** - * Get the 1-based source line number of a re-export. - * - * @param result A valid parse result handle. - * @param index Zero-based index (must be < merve_get_reexports_count()). - * @return 1-based line number, or 0 on error. - */ -uint32_t merve_get_reexport_line(merve_analysis result, size_t index); - -/** - * Get the error code from the last merve_parse_commonjs() call. - * - * @return One of the MERVE_ERROR_* constants, or -1 if the last parse - * succeeded. - * @note This is global state, overwritten by each merve_parse_commonjs() call. - */ -int merve_get_last_error(void); - -/** - * Get the merve library version string. - * - * @return Null-terminated version string (e.g. "1.0.1"). Never NULL. - */ -const char* merve_get_version(void); - -/** - * Get the merve library version as individual components. - * - * @return Struct with major, minor, and revision fields. - */ -merve_version_components merve_get_version_components(void); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* MERVE_C_H */ -/* end file merve_c.h */ - -#include - -struct merve_analysis_impl { - std::optional result{}; -}; - -static merve_string merve_string_create(const char* data, size_t length) { - merve_string out{}; - out.data = data; - out.length = length; - return out; -} - -static void merve_error_loc_clear(merve_error_loc* out_err) { - if (!out_err) return; - out_err->line = 0; - out_err->column = 0; -} - -static void merve_error_loc_set(merve_error_loc* out_err, - const lexer::error_location& loc) { - if (!out_err) return; - out_err->line = loc.line; - out_err->column = loc.column; -} - -extern "C" { - -merve_analysis merve_parse_commonjs(const char* input, size_t length, - merve_error_loc* out_err) { - merve_error_loc_clear(out_err); - - merve_analysis_impl* impl = new (std::nothrow) merve_analysis_impl(); - if (!impl) return nullptr; - if (input != nullptr) { - impl->result = lexer::parse_commonjs(std::string_view(input, length)); - } else { - impl->result = lexer::parse_commonjs(std::string_view("", 0)); - } - - if (!impl->result.has_value() && out_err) { - const std::optional& err_loc = - lexer::get_last_error_location(); - if (err_loc.has_value()) { - merve_error_loc_set(out_err, err_loc.value()); - } - } - - return static_cast(impl); -} - -bool merve_is_valid(merve_analysis result) { - if (!result) return false; - return static_cast(result)->result.has_value(); -} - -void merve_free(merve_analysis result) { - if (!result) return; - delete static_cast(result); -} - -size_t merve_get_exports_count(merve_analysis result) { - if (!result) return 0; - merve_analysis_impl* impl = static_cast(result); - if (!impl->result.has_value()) return 0; - return impl->result->exports.size(); -} - -size_t merve_get_reexports_count(merve_analysis result) { - if (!result) return 0; - merve_analysis_impl* impl = static_cast(result); - if (!impl->result.has_value()) return 0; - return impl->result->re_exports.size(); -} - -merve_string merve_get_export_name(merve_analysis result, size_t index) { - if (!result) return merve_string_create(nullptr, 0); - merve_analysis_impl* impl = static_cast(result); - if (!impl->result.has_value()) return merve_string_create(nullptr, 0); - if (index >= impl->result->exports.size()) - return merve_string_create(nullptr, 0); - std::string_view sv = - lexer::get_string_view(impl->result->exports[index]); - return merve_string_create(sv.data(), sv.size()); -} - -uint32_t merve_get_export_line(merve_analysis result, size_t index) { - if (!result) return 0; - merve_analysis_impl* impl = static_cast(result); - if (!impl->result.has_value()) return 0; - if (index >= impl->result->exports.size()) return 0; - return impl->result->exports[index].line; -} - -merve_string merve_get_reexport_name(merve_analysis result, size_t index) { - if (!result) return merve_string_create(nullptr, 0); - merve_analysis_impl* impl = static_cast(result); - if (!impl->result.has_value()) return merve_string_create(nullptr, 0); - if (index >= impl->result->re_exports.size()) - return merve_string_create(nullptr, 0); - std::string_view sv = - lexer::get_string_view(impl->result->re_exports[index]); - return merve_string_create(sv.data(), sv.size()); -} - -uint32_t merve_get_reexport_line(merve_analysis result, size_t index) { - if (!result) return 0; - merve_analysis_impl* impl = static_cast(result); - if (!impl->result.has_value()) return 0; - if (index >= impl->result->re_exports.size()) return 0; - return impl->result->re_exports[index].line; -} - -int merve_get_last_error(void) { - const std::optional& err = lexer::get_last_error(); - if (!err.has_value()) return -1; - return static_cast(err.value()); -} - -const char* merve_get_version(void) { return MERVE_VERSION; } - -merve_version_components merve_get_version_components(void) { - merve_version_components vc{}; - vc.major = lexer::MERVE_VERSION_MAJOR; - vc.minor = lexer::MERVE_VERSION_MINOR; - vc.revision = lexer::MERVE_VERSION_REVISION; - return vc; -} - -} /* extern "C" */ -/* end file merve_c.cpp */ diff --git a/rust/deps/merve_c.cpp b/rust/deps/merve_c.cpp new file mode 100644 index 0000000..894509b --- /dev/null +++ b/rust/deps/merve_c.cpp @@ -0,0 +1,129 @@ +#include "merve.h" +#include "merve_c.h" + +#include + +struct merve_analysis_impl { + std::optional result{}; +}; + +static merve_string merve_string_create(const char* data, size_t length) { + merve_string out{}; + out.data = data; + out.length = length; + return out; +} + +static void merve_error_loc_clear(merve_error_loc* out_err) { + if (!out_err) return; + out_err->line = 0; + out_err->column = 0; +} + +static void merve_error_loc_set(merve_error_loc* out_err, + const lexer::error_location& loc) { + if (!out_err) return; + out_err->line = loc.line; + out_err->column = loc.column; +} + +extern "C" { + +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err) { + merve_error_loc_clear(out_err); + + merve_analysis_impl* impl = new (std::nothrow) merve_analysis_impl(); + if (!impl) return nullptr; + if (input != nullptr) { + impl->result = lexer::parse_commonjs(std::string_view(input, length)); + } else { + impl->result = lexer::parse_commonjs(std::string_view("", 0)); + } + + if (!impl->result.has_value() && out_err) { + if (const auto& err_loc = lexer::get_last_error_location()) { + merve_error_loc_set(out_err, *err_loc); + } + } + + return static_cast(impl); +} + +bool merve_is_valid(merve_analysis result) { + if (!result) return false; + return static_cast(result)->result.has_value(); +} + +void merve_free(merve_analysis result) { + if (!result) return; + delete static_cast(result); +} + +size_t merve_get_exports_count(merve_analysis result) { + if (!result) return 0; + merve_analysis_impl* impl = static_cast(result); + if (!impl->result.has_value()) return 0; + return impl->result->exports.size(); +} + +size_t merve_get_reexports_count(merve_analysis result) { + if (!result) return 0; + merve_analysis_impl* impl = static_cast(result); + if (!impl->result.has_value()) return 0; + return impl->result->re_exports.size(); +} + +merve_string merve_get_export_name(merve_analysis result, size_t index) { + if (!result) return merve_string_create(nullptr, 0); + merve_analysis_impl* impl = static_cast(result); + if (!impl->result.has_value()) return merve_string_create(nullptr, 0); + if (index >= impl->result->exports.size()) + return merve_string_create(nullptr, 0); + std::string_view sv = lexer::get_string_view(impl->result->exports[index]); + return merve_string_create(sv.data(), sv.size()); +} + +uint32_t merve_get_export_line(merve_analysis result, size_t index) { + if (!result) return 0; + merve_analysis_impl* impl = static_cast(result); + if (!impl->result.has_value()) return 0; + if (index >= impl->result->exports.size()) return 0; + return impl->result->exports[index].line; +} + +merve_string merve_get_reexport_name(merve_analysis result, size_t index) { + if (!result) return merve_string_create(nullptr, 0); + merve_analysis_impl* impl = static_cast(result); + if (!impl->result.has_value()) return merve_string_create(nullptr, 0); + if (index >= impl->result->re_exports.size()) + return merve_string_create(nullptr, 0); + std::string_view sv = lexer::get_string_view(impl->result->re_exports[index]); + return merve_string_create(sv.data(), sv.size()); +} + +uint32_t merve_get_reexport_line(merve_analysis result, size_t index) { + if (!result) return 0; + merve_analysis_impl* impl = static_cast(result); + if (!impl->result.has_value()) return 0; + if (index >= impl->result->re_exports.size()) return 0; + return impl->result->re_exports[index].line; +} + +int merve_get_last_error(void) { + const std::optional& err = lexer::get_last_error(); + if (!err.has_value()) return -1; + return static_cast(err.value()); +} + +const char* merve_get_version(void) { return MERVE_VERSION; } + +merve_version_components merve_get_version_components(void) { + merve_version_components vc{}; + vc.major = lexer::MERVE_VERSION_MAJOR; + vc.minor = lexer::MERVE_VERSION_MINOR; + vc.revision = lexer::MERVE_VERSION_REVISION; + return vc; +} + +} /* extern "C" */ diff --git a/src/merve_c.cpp b/src/merve_c.cpp index f05e460..894509b 100644 --- a/src/merve_c.cpp +++ b/src/merve_c.cpp @@ -42,10 +42,8 @@ merve_analysis merve_parse_commonjs(const char* input, size_t length, } if (!impl->result.has_value() && out_err) { - const std::optional& err_loc = - lexer::get_last_error_location(); - if (err_loc.has_value()) { - merve_error_loc_set(out_err, err_loc.value()); + if (const auto& err_loc = lexer::get_last_error_location()) { + merve_error_loc_set(out_err, *err_loc); } } @@ -82,8 +80,7 @@ merve_string merve_get_export_name(merve_analysis result, size_t index) { if (!impl->result.has_value()) return merve_string_create(nullptr, 0); if (index >= impl->result->exports.size()) return merve_string_create(nullptr, 0); - std::string_view sv = - lexer::get_string_view(impl->result->exports[index]); + std::string_view sv = lexer::get_string_view(impl->result->exports[index]); return merve_string_create(sv.data(), sv.size()); } @@ -101,8 +98,7 @@ merve_string merve_get_reexport_name(merve_analysis result, size_t index) { if (!impl->result.has_value()) return merve_string_create(nullptr, 0); if (index >= impl->result->re_exports.size()) return merve_string_create(nullptr, 0); - std::string_view sv = - lexer::get_string_view(impl->result->re_exports[index]); + std::string_view sv = lexer::get_string_view(impl->result->re_exports[index]); return merve_string_create(sv.data(), sv.size()); } @@ -130,4 +126,4 @@ merve_version_components merve_get_version_components(void) { return vc; } -} /* extern "C" */ +} /* extern "C" */