diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b13885b..475671d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -168,6 +168,7 @@ jobs: working-directory: rust run: cargo test + - name: Test (no default features) working-directory: rust run: cargo test --no-default-features diff --git a/README.md b/README.md index 5d8ac88..01fb992 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,24 @@ const std::optional& get_last_error(); Returns the last parse error, if any. +### `lexer::get_last_error_location` + +```cpp +const std::optional& get_last_error_location(); +``` + +Returns the location of the last parse error, if available. Location tracking +is best-effort and may be unavailable. + +### `lexer::error_location` + +```cpp +struct error_location { + uint32_t line; // 1-based + uint32_t column; // 1-based (byte-oriented) +}; +``` + ## C API merve provides a C API (`merve_c.h`) for use from C programs, FFI bindings, or any language that can call C functions. The C API is compiled into the merve library alongside the C++ implementation. @@ -141,11 +159,13 @@ merve provides a C API (`merve_c.h`) for use from C programs, FFI bindings, or a ```c #include "merve_c.h" #include +#include int main(void) { const char* source = "exports.foo = 1;\nexports.bar = 2;\n"; - merve_analysis result = merve_parse_commonjs(source, strlen(source)); + merve_error_loc err_loc = {0, 0}; + merve_analysis result = merve_parse_commonjs(source, strlen(source), &err_loc); if (merve_is_valid(result)) { size_t count = merve_get_exports_count(result); @@ -157,6 +177,9 @@ int main(void) { } } else { printf("Parse error: %d\n", merve_get_last_error()); + if (err_loc.line != 0) { + printf(" at line %u, column %u\n", err_loc.line, err_loc.column); + } } merve_free(result); @@ -180,12 +203,13 @@ Found 2 exports: | `merve_string` | Non-owning string reference (`data` + `length`). Not null-terminated. | | `merve_analysis` | Opaque handle to a parse result. Must be freed with `merve_free()`. | | `merve_version_components` | Struct with `major`, `minor`, `revision` fields. | +| `merve_error_loc` | Error location (`line`, `column`). `{0,0}` means unavailable. | #### Functions | Function | Description | |----------|-------------| -| `merve_parse_commonjs(input, length)` | Parse CommonJS source. Returns a handle (NULL only on OOM). | +| `merve_parse_commonjs(input, length, out_err)` | Parse CommonJS source and optionally fill error location. Returns a handle (NULL only on OOM). | | `merve_is_valid(result)` | Check if parsing succeeded. NULL-safe. | | `merve_free(result)` | Free a parse result. NULL-safe. | | `merve_get_exports_count(result)` | Number of named exports found. | @@ -198,6 +222,9 @@ Found 2 exports: | `merve_get_version()` | Version string (e.g. `"1.0.1"`). | | `merve_get_version_components()` | Version as `{major, minor, revision}`. | +On parse failure, `merve_parse_commonjs` writes a non-zero location when +`out_err` is non-NULL and the location is available. + #### Error Constants | Constant | Value | Description | diff --git a/include/merve/parser.h b/include/merve/parser.h index 8d1ff42..6458872 100644 --- a/include/merve/parser.h +++ b/include/merve/parser.h @@ -3,6 +3,7 @@ #include "merve/version.h" +#include #include #include #include @@ -37,6 +38,17 @@ enum lexer_error { TEMPLATE_NEST_OVERFLOW, ///< Template literal nesting too deep }; +/** + * @brief Source location information for a parse error. + * + * - line and column are 1-based. + * - column is byte-oriented. + */ +struct error_location { + uint32_t line; + uint32_t column; +}; + /** * @brief Type alias for export names. * @@ -146,6 +158,18 @@ std::optional parse_commonjs(std::string_view file_contents); */ const std::optional& get_last_error(); +/** + * @brief Get the location of the last failed parse operation. + * + * @return const std::optional& The last error location, or + * std::nullopt if unavailable. + * + * @note This is global state and may be overwritten by subsequent calls + * to parse_commonjs(). + * @note Location tracking is best-effort and may be unavailable. + */ +const std::optional& get_last_error_location(); + } // namespace lexer #endif // MERVE_PARSER_H diff --git a/include/merve_c.h b/include/merve_c.h index af4a9d7..035ce21 100644 --- a/include/merve_c.h +++ b/include/merve_c.h @@ -39,6 +39,19 @@ typedef struct { int revision; } merve_version_components; +/** + * @brief Source location for a parse error. + * + * - line and column are 1-based. + * - column is byte-oriented. + * + * A zeroed location (`{0, 0}`) means the location is unavailable. + */ +typedef struct { + uint32_t line; + uint32_t column; +} merve_error_loc; + /* Error codes corresponding to lexer::lexer_error values. */ #define MERVE_ERROR_TODO 0 #define MERVE_ERROR_UNEXPECTED_PAREN 1 @@ -59,20 +72,32 @@ extern "C" { #endif /** - * Parse CommonJS source code and extract export information. + * Parse CommonJS source code and optionally return error location. * * The source buffer must remain valid while accessing string_view-backed * export names from the returned handle. * + * If @p out_err is non-NULL, it is always written: + * - On success: set to {0, 0}. + * - On parse failure with known location: set to that location. + * - On parse failure without available location: set to {0, 0}. + * * You must call merve_free() on the returned handle when done. * - * @param input Pointer to the JavaScript source (need not be null-terminated). - * NULL is treated as an empty string. - * @param length Length of the input in bytes. + * @param input Pointer to the JavaScript source (need not be + * null-terminated). NULL is treated as an empty string. + * @param length Length of the input in bytes. + * @param out_err Optional output pointer for parse error location. * @return A handle to the parse result, or NULL on out-of-memory. * Use merve_is_valid() to check if parsing succeeded. */ -merve_analysis merve_parse_commonjs(const char* input, size_t length); +#ifdef __cplusplus +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err = nullptr); +#else +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err); +#endif /** * Check whether the parse result is valid (parsing succeeded). @@ -165,7 +190,7 @@ const char* merve_get_version(void); merve_version_components merve_get_version_components(void); #ifdef __cplusplus -} /* extern "C" */ +} /* extern "C" */ #endif #endif /* MERVE_C_H */ diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 6f19756..f4781cb 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -77,7 +77,7 @@ checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "merve" -version = "1.1.2" +version = "1.1.3" dependencies = [ "cc", "link_args", diff --git a/rust/README.md b/rust/README.md index e41045e..088bb55 100644 --- a/rust/README.md +++ b/rust/README.md @@ -51,7 +51,7 @@ merve = { version = "...", features = ["libcpp"] } ### `parse_commonjs` ```rust -pub fn parse_commonjs(source: &str) -> Result, LexerError> +pub fn parse_commonjs(source: &str) -> Result, LocatedLexerError> ``` Parse CommonJS source code and extract export information. The returned @@ -100,6 +100,17 @@ Returned when the input contains ESM syntax or malformed constructs: `LexerError` implements `Display` and, with the `std` feature, `std::error::Error`. +### `LocatedLexerError` + +```rust +pub struct LocatedLexerError { + pub kind: LexerError, + pub location: Option, +} +``` + +`ErrorLocation` uses 1-based `line`/`column` (byte-oriented column). + ### Versioning helpers ```rust diff --git a/rust/build.rs b/rust/build.rs index 74db0a9..e691882 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -133,8 +133,8 @@ fn amalgamate_file( out.push_str(&format!("/* end file {filename} */\n")); } -/// When building inside the merve repository, produce the three amalgamated -/// files in `deps/`: merve.h, merve.cpp, merve_c.h. +/// When building inside the merve repository, produce the vendored +/// sources in `deps/`: merve.h, merve.cpp, merve_c.cpp, merve_c.h. fn amalgamate_from_repo(project_root: &Path, deps: &Path) { let include_path = project_root.join("include"); let source_path = project_root.join("src"); @@ -159,21 +159,23 @@ fn amalgamate_from_repo(project_root: &Path, deps: &Path) { ); fs::write(deps.join("merve.h"), &header).expect("failed to write deps/merve.h"); - // 2. Amalgamate merve.cpp (parser.cpp + merve_c.cpp with includes resolved). + // 2. Amalgamate merve.cpp (parser.cpp with includes resolved). let mut source = String::from("#include \"merve.h\"\n\n"); - for cpp in &["parser.cpp", "merve_c.cpp"] { - amalgamate_file( - &include_path, - &source_path, - &source_path, - cpp, - &mut source, - &mut included, - ); - } + amalgamate_file( + &include_path, + &source_path, + &source_path, + "parser.cpp", + &mut source, + &mut included, + ); fs::write(deps.join("merve.cpp"), &source).expect("failed to write deps/merve.cpp"); - // 3. Copy merve_c.h verbatim (standalone C header). + // 3. Copy merve_c.cpp verbatim (C API implementation). + fs::copy(source_path.join("merve_c.cpp"), deps.join("merve_c.cpp")) + .expect("failed to copy merve_c.cpp"); + + // 4. Copy merve_c.h verbatim (standalone C header). fs::copy(include_path.join("merve_c.h"), deps.join("merve_c.h")) .expect("failed to copy merve_c.h"); } @@ -207,15 +209,17 @@ fn main() { } } - // Both in-repo and published crate use the same layout: merve.cpp + merve.h + merve_c.h + // Both in-repo and published crate use the same layout: + // merve.cpp + merve_c.cpp + merve.h + merve_c.h assert!( - deps.join("merve.cpp").exists(), + deps.join("merve.cpp").exists() && deps.join("merve_c.cpp").exists(), "No C++ sources found in deps/. \ - When building outside the repository, deps/ must contain the amalgamated sources." + When building outside the repository, deps/ must contain the vendored sources." ); let mut build = cc::Build::new(); build.file(deps.join("merve.cpp")); + build.file(deps.join("merve_c.cpp")); build.include(&deps); build.cpp(true).std("c++20").warnings(false); diff --git a/rust/deps/merve.cpp b/rust/deps/merve.cpp index dbb39b3..7df6f39 100644 --- a/rust/deps/merve.cpp +++ b/rust/deps/merve.cpp @@ -314,6 +314,40 @@ struct StarExportBinding { // Thread-local state for error tracking (safe for concurrent parse calls). thread_local std::optional last_error; +thread_local std::optional last_error_location; + +static error_location makeErrorLocation(const char* source, const char* end, const char* at) { + const char* target = at; + if (target < source) target = source; + if (target > end) target = end; + + uint32_t line = 1; + uint32_t column = 1; + const char* cur = source; + + while (cur < target) { + const char ch = *cur++; + if (ch == '\n') { + line++; + column = 1; + continue; + } + if (ch == '\r') { + line++; + column = 1; + if (cur < target && *cur == '\n') { + cur++; + } + continue; + } + column++; + } + + error_location loc{}; + loc.line = line; + loc.column = column; + return loc; +} // Lexer state class class CJSLexer { @@ -334,6 +368,7 @@ class CJSLexer { std::array templateStack_; std::array openTokenPosStack_; + std::array openTokenTypeStack_; std::array openClassPosStack; std::array starExportStack_; StarExportBinding* starExportStack; @@ -485,9 +520,11 @@ class CJSLexer { } // Parsing utilities - void syntaxError(lexer_error code) { + void syntaxError(lexer_error code, const char* at = nullptr) { if (!last_error) { last_error = code; + const char* error_pos = at ? at : pos; + last_error_location = makeErrorLocation(source, end, error_pos); } pos = end + 1; } @@ -1490,6 +1527,7 @@ class CJSLexer { char ch = commentWhitespace(); switch (ch) { case '(': + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = startPos; return; case '.': @@ -1503,7 +1541,7 @@ class CJSLexer { // It's something like import.metaData, not import.meta return; } - syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT_META); + syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT_META, startPos); } return; default: @@ -1518,17 +1556,18 @@ class CJSLexer { pos--; return; } - syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT); + syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT, startPos); } } void throwIfExportStatement() { + const char* startPos = pos; pos += 6; const char* curPos = pos; char ch = commentWhitespace(); if (pos == curPos && !isPunctuator(ch)) return; - syntaxError(lexer_error::UNEXPECTED_ESM_EXPORT); + syntaxError(lexer_error::UNEXPECTED_ESM_EXPORT, startPos); } public: @@ -1537,7 +1576,7 @@ class CJSLexer { templateStackDepth(0), openTokenDepth(0), templateDepth(0), line(1), lastSlashWasDivision(false), nextBraceIsClass(false), - templateStack_{}, openTokenPosStack_{}, openClassPosStack{}, + templateStack_{}, openTokenPosStack_{}, openTokenTypeStack_{}, openClassPosStack{}, starExportStack_{}, starExportStack(nullptr), STAR_EXPORT_STACK_END(nullptr), exports(out_exports), re_exports(out_re_exports) {} @@ -1602,6 +1641,7 @@ class CJSLexer { pos += 23; if (*pos == '(') { pos++; + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = lastTokenPos; if (tryParseRequire(RequireType::Import) && keywordStart(startPos)) tryBacktrackAddStarExportBinding(startPos - 1); @@ -1611,6 +1651,7 @@ class CJSLexer { if (pos + 4 < end && matchesAt(pos, end, "Star")) pos += 4; if (*pos == '(') { + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = lastTokenPos; if (*(pos + 1) == 'r') { pos++; @@ -1645,6 +1686,7 @@ class CJSLexer { tryParseObjectDefineOrKeys(openTokenDepth == 0); break; case '(': + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = lastTokenPos; break; case ')': @@ -1657,6 +1699,7 @@ class CJSLexer { case '{': openClassPosStack[openTokenDepth] = nextBraceIsClass; nextBraceIsClass = false; + openTokenTypeStack_[openTokenDepth] = '{'; openTokenPosStack_[openTokenDepth++] = lastTokenPos; break; case '}': @@ -1719,6 +1762,19 @@ class CJSLexer { lastTokenPos = pos; } + if (!last_error) { + if (templateDepth != std::numeric_limits::max()) { + syntaxError(lexer_error::UNTERMINATED_TEMPLATE_STRING, end); + } else if (openTokenDepth != 0) { + const char open_ch = openTokenTypeStack_[openTokenDepth - 1]; + if (open_ch == '{') { + syntaxError(lexer_error::UNTERMINATED_BRACE, end); + } else { + syntaxError(lexer_error::UNTERMINATED_PAREN, end); + } + } + } + if (templateDepth != std::numeric_limits::max() || openTokenDepth || last_error) { return false; } @@ -1729,6 +1785,7 @@ class CJSLexer { std::optional parse_commonjs(std::string_view file_contents) { last_error.reset(); + last_error_location.reset(); lexer_analysis result; CJSLexer lexer(result.exports, result.re_exports); @@ -1744,293 +1801,9 @@ const std::optional& get_last_error() { return last_error; } -} // namespace lexer -/* end file parser.cpp */ -/* begin file merve_c.cpp */ -/* begin file merve.h */ -#ifndef MERVE_H -#define MERVE_H - - -#endif // MERVE_H -/* end file merve.h */ -/* begin file merve_c.h */ -/** - * @file merve_c.h - * @brief Includes the C definitions for merve. This is a C file, not C++. - */ -#ifndef MERVE_C_H -#define MERVE_C_H - -#include -#include -#include - -/** - * @brief Non-owning string reference. - * - * The data pointer is NOT null-terminated. Always use the length field. - * - * The data is valid as long as: - * - The merve_analysis handle that produced it has not been freed. - * - For string_view-backed exports: the original source buffer is alive. - */ -typedef struct { - const char* data; - size_t length; -} merve_string; - -/** - * @brief Opaque handle to a CommonJS parse result. - * - * Created by merve_parse_commonjs(). Must be freed with merve_free(). - */ -typedef void* merve_analysis; - -/** - * @brief Version number components. - */ -typedef struct { - int major; - int minor; - int revision; -} merve_version_components; - -/* Error codes corresponding to lexer::lexer_error values. */ -#define MERVE_ERROR_TODO 0 -#define MERVE_ERROR_UNEXPECTED_PAREN 1 -#define MERVE_ERROR_UNEXPECTED_BRACE 2 -#define MERVE_ERROR_UNTERMINATED_PAREN 3 -#define MERVE_ERROR_UNTERMINATED_BRACE 4 -#define MERVE_ERROR_UNTERMINATED_TEMPLATE_STRING 5 -#define MERVE_ERROR_UNTERMINATED_STRING_LITERAL 6 -#define MERVE_ERROR_UNTERMINATED_REGEX_CHARACTER_CLASS 7 -#define MERVE_ERROR_UNTERMINATED_REGEX 8 -#define MERVE_ERROR_UNEXPECTED_ESM_IMPORT_META 9 -#define MERVE_ERROR_UNEXPECTED_ESM_IMPORT 10 -#define MERVE_ERROR_UNEXPECTED_ESM_EXPORT 11 -#define MERVE_ERROR_TEMPLATE_NEST_OVERFLOW 12 - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Parse CommonJS source code and extract export information. - * - * The source buffer must remain valid while accessing string_view-backed - * export names from the returned handle. - * - * You must call merve_free() on the returned handle when done. - * - * @param input Pointer to the JavaScript source (need not be null-terminated). - * NULL is treated as an empty string. - * @param length Length of the input in bytes. - * @return A handle to the parse result, or NULL on out-of-memory. - * Use merve_is_valid() to check if parsing succeeded. - */ -merve_analysis merve_parse_commonjs(const char* input, size_t length); - -/** - * Check whether the parse result is valid (parsing succeeded). - * - * @param result Handle returned by merve_parse_commonjs(). NULL returns false. - * @return true if parsing succeeded, false otherwise. - */ -bool merve_is_valid(merve_analysis result); - -/** - * Free a parse result and all associated memory. - * - * @param result Handle returned by merve_parse_commonjs(). NULL is a no-op. - */ -void merve_free(merve_analysis result); - -/** - * Get the number of named exports found. - * - * @param result A parse result handle. NULL returns 0. - * @return Number of exports, or 0 if result is NULL or invalid. - */ -size_t merve_get_exports_count(merve_analysis result); - -/** - * Get the number of re-export module specifiers found. - * - * @param result A parse result handle. NULL returns 0. - * @return Number of re-exports, or 0 if result is NULL or invalid. - */ -size_t merve_get_reexports_count(merve_analysis result); - -/** - * Get the name of an export at the given index. - * - * @param result A valid parse result handle. - * @param index Zero-based index (must be < merve_get_exports_count()). - * @return Non-owning string reference. Returns {NULL, 0} on error. - */ -merve_string merve_get_export_name(merve_analysis result, size_t index); - -/** - * Get the 1-based source line number of an export. - * - * @param result A valid parse result handle. - * @param index Zero-based index (must be < merve_get_exports_count()). - * @return 1-based line number, or 0 on error. - */ -uint32_t merve_get_export_line(merve_analysis result, size_t index); - -/** - * Get the module specifier of a re-export at the given index. - * - * @param result A valid parse result handle. - * @param index Zero-based index (must be < merve_get_reexports_count()). - * @return Non-owning string reference. Returns {NULL, 0} on error. - */ -merve_string merve_get_reexport_name(merve_analysis result, size_t index); - -/** - * Get the 1-based source line number of a re-export. - * - * @param result A valid parse result handle. - * @param index Zero-based index (must be < merve_get_reexports_count()). - * @return 1-based line number, or 0 on error. - */ -uint32_t merve_get_reexport_line(merve_analysis result, size_t index); - -/** - * Get the error code from the last merve_parse_commonjs() call. - * - * @return One of the MERVE_ERROR_* constants, or -1 if the last parse - * succeeded. - * @note This is global state, overwritten by each merve_parse_commonjs() call. - */ -int merve_get_last_error(void); - -/** - * Get the merve library version string. - * - * @return Null-terminated version string (e.g. "1.0.1"). Never NULL. - */ -const char* merve_get_version(void); - -/** - * Get the merve library version as individual components. - * - * @return Struct with major, minor, and revision fields. - */ -merve_version_components merve_get_version_components(void); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* MERVE_C_H */ -/* end file merve_c.h */ - -#include - -struct merve_analysis_impl { - std::optional result{}; -}; - -static merve_string merve_string_create(const char* data, size_t length) { - merve_string out{}; - out.data = data; - out.length = length; - return out; -} - -extern "C" { - -merve_analysis merve_parse_commonjs(const char* input, size_t length) { - merve_analysis_impl* impl = new (std::nothrow) merve_analysis_impl(); - if (!impl) return nullptr; - if (input != nullptr) { - impl->result = lexer::parse_commonjs(std::string_view(input, length)); - } else { - impl->result = lexer::parse_commonjs(std::string_view("", 0)); - } - return static_cast(impl); -} - -bool merve_is_valid(merve_analysis result) { - if (!result) return false; - return static_cast(result)->result.has_value(); -} - -void merve_free(merve_analysis result) { - if (!result) return; - delete static_cast(result); -} - -size_t merve_get_exports_count(merve_analysis result) { - if (!result) return 0; - merve_analysis_impl* impl = static_cast(result); - if (!impl->result.has_value()) return 0; - return impl->result->exports.size(); -} - -size_t merve_get_reexports_count(merve_analysis result) { - if (!result) return 0; - merve_analysis_impl* impl = static_cast(result); - if (!impl->result.has_value()) return 0; - return impl->result->re_exports.size(); -} - -merve_string merve_get_export_name(merve_analysis result, size_t index) { - if (!result) return merve_string_create(nullptr, 0); - merve_analysis_impl* impl = static_cast(result); - if (!impl->result.has_value()) return merve_string_create(nullptr, 0); - if (index >= impl->result->exports.size()) - return merve_string_create(nullptr, 0); - std::string_view sv = - lexer::get_string_view(impl->result->exports[index]); - return merve_string_create(sv.data(), sv.size()); +const std::optional& get_last_error_location() { + return last_error_location; } -uint32_t merve_get_export_line(merve_analysis result, size_t index) { - if (!result) return 0; - merve_analysis_impl* impl = static_cast(result); - if (!impl->result.has_value()) return 0; - if (index >= impl->result->exports.size()) return 0; - return impl->result->exports[index].line; -} - -merve_string merve_get_reexport_name(merve_analysis result, size_t index) { - if (!result) return merve_string_create(nullptr, 0); - merve_analysis_impl* impl = static_cast(result); - if (!impl->result.has_value()) return merve_string_create(nullptr, 0); - if (index >= impl->result->re_exports.size()) - return merve_string_create(nullptr, 0); - std::string_view sv = - lexer::get_string_view(impl->result->re_exports[index]); - return merve_string_create(sv.data(), sv.size()); -} - -uint32_t merve_get_reexport_line(merve_analysis result, size_t index) { - if (!result) return 0; - merve_analysis_impl* impl = static_cast(result); - if (!impl->result.has_value()) return 0; - if (index >= impl->result->re_exports.size()) return 0; - return impl->result->re_exports[index].line; -} - -int merve_get_last_error(void) { - const std::optional& err = lexer::get_last_error(); - if (!err.has_value()) return -1; - return static_cast(err.value()); -} - -const char* merve_get_version(void) { return MERVE_VERSION; } - -merve_version_components merve_get_version_components(void) { - merve_version_components vc{}; - vc.major = lexer::MERVE_VERSION_MAJOR; - vc.minor = lexer::MERVE_VERSION_MINOR; - vc.revision = lexer::MERVE_VERSION_REVISION; - return vc; -} - -} /* extern "C" */ -/* end file merve_c.cpp */ +} // namespace lexer +/* end file parser.cpp */ diff --git a/rust/deps/merve.h b/rust/deps/merve.h index d30dec2..e37bcef 100644 --- a/rust/deps/merve.h +++ b/rust/deps/merve.h @@ -29,6 +29,7 @@ enum { #endif // MERVE_VERSION_H /* end file merve/version.h */ +#include #include #include #include @@ -63,6 +64,17 @@ enum lexer_error { TEMPLATE_NEST_OVERFLOW, ///< Template literal nesting too deep }; +/** + * @brief Source location information for a parse error. + * + * - line and column are 1-based. + * - column is byte-oriented. + */ +struct error_location { + uint32_t line; + uint32_t column; +}; + /** * @brief Type alias for export names. * @@ -172,6 +184,18 @@ std::optional parse_commonjs(std::string_view file_contents); */ const std::optional& get_last_error(); +/** + * @brief Get the location of the last failed parse operation. + * + * @return const std::optional& The last error location, or + * std::nullopt if unavailable. + * + * @note This is global state and may be overwritten by subsequent calls + * to parse_commonjs(). + * @note Location tracking is best-effort and may be unavailable. + */ +const std::optional& get_last_error_location(); + } // namespace lexer #endif // MERVE_PARSER_H diff --git a/rust/deps/merve_c.cpp b/rust/deps/merve_c.cpp new file mode 100644 index 0000000..894509b --- /dev/null +++ b/rust/deps/merve_c.cpp @@ -0,0 +1,129 @@ +#include "merve.h" +#include "merve_c.h" + +#include + +struct merve_analysis_impl { + std::optional result{}; +}; + +static merve_string merve_string_create(const char* data, size_t length) { + merve_string out{}; + out.data = data; + out.length = length; + return out; +} + +static void merve_error_loc_clear(merve_error_loc* out_err) { + if (!out_err) return; + out_err->line = 0; + out_err->column = 0; +} + +static void merve_error_loc_set(merve_error_loc* out_err, + const lexer::error_location& loc) { + if (!out_err) return; + out_err->line = loc.line; + out_err->column = loc.column; +} + +extern "C" { + +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err) { + merve_error_loc_clear(out_err); + + merve_analysis_impl* impl = new (std::nothrow) merve_analysis_impl(); + if (!impl) return nullptr; + if (input != nullptr) { + impl->result = lexer::parse_commonjs(std::string_view(input, length)); + } else { + impl->result = lexer::parse_commonjs(std::string_view("", 0)); + } + + if (!impl->result.has_value() && out_err) { + if (const auto& err_loc = lexer::get_last_error_location()) { + merve_error_loc_set(out_err, *err_loc); + } + } + + return static_cast(impl); +} + +bool merve_is_valid(merve_analysis result) { + if (!result) return false; + return static_cast(result)->result.has_value(); +} + +void merve_free(merve_analysis result) { + if (!result) return; + delete static_cast(result); +} + +size_t merve_get_exports_count(merve_analysis result) { + if (!result) return 0; + merve_analysis_impl* impl = static_cast(result); + if (!impl->result.has_value()) return 0; + return impl->result->exports.size(); +} + +size_t merve_get_reexports_count(merve_analysis result) { + if (!result) return 0; + merve_analysis_impl* impl = static_cast(result); + if (!impl->result.has_value()) return 0; + return impl->result->re_exports.size(); +} + +merve_string merve_get_export_name(merve_analysis result, size_t index) { + if (!result) return merve_string_create(nullptr, 0); + merve_analysis_impl* impl = static_cast(result); + if (!impl->result.has_value()) return merve_string_create(nullptr, 0); + if (index >= impl->result->exports.size()) + return merve_string_create(nullptr, 0); + std::string_view sv = lexer::get_string_view(impl->result->exports[index]); + return merve_string_create(sv.data(), sv.size()); +} + +uint32_t merve_get_export_line(merve_analysis result, size_t index) { + if (!result) return 0; + merve_analysis_impl* impl = static_cast(result); + if (!impl->result.has_value()) return 0; + if (index >= impl->result->exports.size()) return 0; + return impl->result->exports[index].line; +} + +merve_string merve_get_reexport_name(merve_analysis result, size_t index) { + if (!result) return merve_string_create(nullptr, 0); + merve_analysis_impl* impl = static_cast(result); + if (!impl->result.has_value()) return merve_string_create(nullptr, 0); + if (index >= impl->result->re_exports.size()) + return merve_string_create(nullptr, 0); + std::string_view sv = lexer::get_string_view(impl->result->re_exports[index]); + return merve_string_create(sv.data(), sv.size()); +} + +uint32_t merve_get_reexport_line(merve_analysis result, size_t index) { + if (!result) return 0; + merve_analysis_impl* impl = static_cast(result); + if (!impl->result.has_value()) return 0; + if (index >= impl->result->re_exports.size()) return 0; + return impl->result->re_exports[index].line; +} + +int merve_get_last_error(void) { + const std::optional& err = lexer::get_last_error(); + if (!err.has_value()) return -1; + return static_cast(err.value()); +} + +const char* merve_get_version(void) { return MERVE_VERSION; } + +merve_version_components merve_get_version_components(void) { + merve_version_components vc{}; + vc.major = lexer::MERVE_VERSION_MAJOR; + vc.minor = lexer::MERVE_VERSION_MINOR; + vc.revision = lexer::MERVE_VERSION_REVISION; + return vc; +} + +} /* extern "C" */ diff --git a/rust/deps/merve_c.h b/rust/deps/merve_c.h index af4a9d7..035ce21 100644 --- a/rust/deps/merve_c.h +++ b/rust/deps/merve_c.h @@ -39,6 +39,19 @@ typedef struct { int revision; } merve_version_components; +/** + * @brief Source location for a parse error. + * + * - line and column are 1-based. + * - column is byte-oriented. + * + * A zeroed location (`{0, 0}`) means the location is unavailable. + */ +typedef struct { + uint32_t line; + uint32_t column; +} merve_error_loc; + /* Error codes corresponding to lexer::lexer_error values. */ #define MERVE_ERROR_TODO 0 #define MERVE_ERROR_UNEXPECTED_PAREN 1 @@ -59,20 +72,32 @@ extern "C" { #endif /** - * Parse CommonJS source code and extract export information. + * Parse CommonJS source code and optionally return error location. * * The source buffer must remain valid while accessing string_view-backed * export names from the returned handle. * + * If @p out_err is non-NULL, it is always written: + * - On success: set to {0, 0}. + * - On parse failure with known location: set to that location. + * - On parse failure without available location: set to {0, 0}. + * * You must call merve_free() on the returned handle when done. * - * @param input Pointer to the JavaScript source (need not be null-terminated). - * NULL is treated as an empty string. - * @param length Length of the input in bytes. + * @param input Pointer to the JavaScript source (need not be + * null-terminated). NULL is treated as an empty string. + * @param length Length of the input in bytes. + * @param out_err Optional output pointer for parse error location. * @return A handle to the parse result, or NULL on out-of-memory. * Use merve_is_valid() to check if parsing succeeded. */ -merve_analysis merve_parse_commonjs(const char* input, size_t length); +#ifdef __cplusplus +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err = nullptr); +#else +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err); +#endif /** * Check whether the parse result is valid (parsing succeeded). @@ -165,7 +190,7 @@ const char* merve_get_version(void); merve_version_components merve_get_version_components(void); #ifdef __cplusplus -} /* extern "C" */ +} /* extern "C" */ #endif #endif /* MERVE_C_H */ diff --git a/rust/src/ffi.rs b/rust/src/ffi.rs index a966915..ab7148f 100644 --- a/rust/src/ffi.rs +++ b/rust/src/ffi.rs @@ -25,8 +25,18 @@ pub struct merve_version_components { pub revision: c_int, } +#[repr(C)] +pub struct merve_error_loc { + pub line: u32, + pub column: u32, +} + unsafe extern "C" { - pub fn merve_parse_commonjs(input: *const c_char, length: usize) -> merve_analysis; + pub fn merve_parse_commonjs( + input: *const c_char, + length: usize, + out_err: *mut merve_error_loc, + ) -> merve_analysis; pub fn merve_is_valid(result: merve_analysis) -> bool; pub fn merve_free(result: merve_analysis); pub fn merve_get_exports_count(result: merve_analysis) -> usize; diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 0997598..841fe01 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -112,6 +112,66 @@ impl fmt::Display for LexerError { #[cfg(feature = "std")] impl std::error::Error for LexerError {} +/// 1-based error position. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct ErrorLocation { + /// 1-based line number. + pub line: NonZeroU32, + /// 1-based column number (byte-oriented). + pub column: NonZeroU32, +} + +impl ErrorLocation { + #[inline] + fn from_ffi(loc: ffi::merve_error_loc) -> Option { + Some(Self { + line: NonZeroU32::new(loc.line)?, + column: NonZeroU32::new(loc.column)?, + }) + } +} + +/// Lexer error with optional source location. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct LocatedLexerError { + /// Error kind reported by the lexer. + pub kind: LexerError, + /// Source location, if available. + pub location: Option, +} + +impl LocatedLexerError { + #[inline] + fn from_code_and_loc(code: i32, loc: ffi::merve_error_loc) -> Self { + let kind = if code >= 0 { + LexerError::from_code(code) + } else { + LexerError::Unknown(code) + }; + Self { + kind, + location: ErrorLocation::from_ffi(loc), + } + } +} + +impl fmt::Display for LocatedLexerError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(loc) = self.location { + write!( + f, + "{} at line {}, column {}", + self.kind, loc.line, loc.column + ) + } else { + write!(f, "{}", self.kind) + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for LocatedLexerError {} + /// A parsed CommonJS analysis result. /// /// The lifetime `'a` is tied to the source string passed to [`parse_commonjs`], @@ -317,7 +377,7 @@ impl ExactSizeIterator for ExportIter<'_, '_> {} /// /// # Errors /// -/// Returns a [`LexerError`] if the input contains ESM syntax or other +/// Returns a [`LocatedLexerError`] if the input contains ESM syntax or other /// unsupported constructs. /// /// # Examples @@ -346,30 +406,33 @@ impl ExactSizeIterator for ExportIter<'_, '_> {} /// let _ = leaked; /// } /// ``` -pub fn parse_commonjs(source: &str) -> Result, LexerError> { +pub fn parse_commonjs(source: &str) -> Result, LocatedLexerError> { if source.is_empty() { - return Err(LexerError::EmptySource); + return Err(LocatedLexerError { + kind: LexerError::EmptySource, + location: Some(ErrorLocation { + line: NonZeroU32::new(1).expect("1 is non-zero"), + column: NonZeroU32::new(1).expect("1 is non-zero"), + }), + }); } - let handle = unsafe { ffi::merve_parse_commonjs(source.as_ptr().cast(), source.len()) }; + + let mut loc = ffi::merve_error_loc { line: 0, column: 0 }; + + let handle = + unsafe { ffi::merve_parse_commonjs(source.as_ptr().cast(), source.len(), &mut loc) }; if handle.is_null() { - // NULL means allocation failure; map to a generic error let code = unsafe { ffi::merve_get_last_error() }; - return Err(if code >= 0 { - LexerError::from_code(code) - } else { - LexerError::Unknown(code) - }); + return Err(LocatedLexerError::from_code_and_loc(code, loc)); } + if !unsafe { ffi::merve_is_valid(handle) } { let code = unsafe { ffi::merve_get_last_error() }; - let err = if code >= 0 { - LexerError::from_code(code) - } else { - LexerError::Unknown(code) - }; + let err = LocatedLexerError::from_code_and_loc(code, loc); unsafe { ffi::merve_free(handle) }; return Err(err); } + Ok(Analysis { handle, _source: PhantomData, @@ -455,7 +518,7 @@ mod tests { let result = parse_commonjs(source); assert!(result.is_err()); let err = result.unwrap_err(); - assert_eq!(err, LexerError::UnexpectedEsmImport); + assert_eq!(err.kind, LexerError::UnexpectedEsmImport); } #[test] @@ -464,14 +527,69 @@ mod tests { let result = parse_commonjs(source); assert!(result.is_err()); let err = result.unwrap_err(); - assert_eq!(err, LexerError::UnexpectedEsmExport); + assert_eq!(err.kind, LexerError::UnexpectedEsmExport); } #[test] fn empty_input() { let result = parse_commonjs(""); assert!(result.is_err()); - assert_eq!(result.unwrap_err(), LexerError::EmptySource); + let err = result.unwrap_err(); + assert_eq!(err.kind, LexerError::EmptySource); + let loc = err + .location + .expect("empty source location should be present"); + assert_eq!(loc.line, NonZeroU32::new(1).unwrap()); + assert_eq!(loc.column, NonZeroU32::new(1).unwrap()); + } + + #[test] + fn parse_reports_error_position() { + let source = "\n import 'x';"; + let result = parse_commonjs(source); + assert!(result.is_err()); + + let err = result.unwrap_err(); + assert_eq!(err.kind, LexerError::UnexpectedEsmImport); + let loc = err.location.expect("location should be present"); + assert_eq!(loc.line, NonZeroU32::new(2).unwrap()); + assert_eq!(loc.column, NonZeroU32::new(3).unwrap()); + } + + #[test] + fn parse_crlf_position() { + let source = "\r\n import 'x';"; + let result = parse_commonjs(source); + assert!(result.is_err()); + + let err = result.unwrap_err(); + assert_eq!(err.kind, LexerError::UnexpectedEsmImport); + let loc = err.location.expect("location should be present"); + assert_eq!(loc.line, NonZeroU32::new(2).unwrap()); + assert_eq!(loc.column, NonZeroU32::new(3).unwrap()); + } + + #[test] + fn parse_import_meta_and_eof() { + let import_meta = parse_commonjs("\n import.meta.url"); + assert!(import_meta.is_err()); + let import_meta_err = import_meta.unwrap_err(); + assert_eq!(import_meta_err.kind, LexerError::UnexpectedEsmImportMeta); + let import_meta_loc = import_meta_err + .location + .expect("import.meta location should be present"); + assert_eq!(import_meta_loc.line, NonZeroU32::new(2).unwrap()); + assert_eq!(import_meta_loc.column, NonZeroU32::new(3).unwrap()); + + let eof = parse_commonjs("(a + b"); + assert!(eof.is_err()); + let eof_err = eof.unwrap_err(); + assert_eq!(eof_err.kind, LexerError::UnterminatedParen); + let eof_loc = eof_err + .location + .expect("unterminated paren location should be present"); + assert_eq!(eof_loc.line, NonZeroU32::new(1).unwrap()); + assert_eq!(eof_loc.column, NonZeroU32::new(7).unwrap()); } #[test] @@ -558,6 +676,22 @@ mod tests { assert!(s.contains("99"), "got: {s}"); } + #[cfg(feature = "std")] + #[test] + fn located_error_display_includes_location() { + let err = LocatedLexerError { + kind: LexerError::UnexpectedEsmImport, + location: Some(ErrorLocation { + line: NonZeroU32::new(2).unwrap(), + column: NonZeroU32::new(4).unwrap(), + }), + }; + let s = format!("{err}"); + assert!(s.contains("line 2"), "got: {s}"); + assert!(s.contains("column 4"), "got: {s}"); + assert!(!s.contains("offset"), "got: {s}"); + } + #[test] fn error_from_code_roundtrip() { for code in 0..=12 { @@ -574,6 +708,13 @@ mod tests { assert_error::(); } + #[cfg(feature = "std")] + #[test] + fn located_error_is_std_error() { + fn assert_error() {} + assert_error::(); + } + #[test] fn bracket_notation_exports() { let source = r#"exports["hello-world"] = 1;"#; diff --git a/src/merve_c.cpp b/src/merve_c.cpp index 1909fa3..894509b 100644 --- a/src/merve_c.cpp +++ b/src/merve_c.cpp @@ -14,9 +14,25 @@ static merve_string merve_string_create(const char* data, size_t length) { return out; } +static void merve_error_loc_clear(merve_error_loc* out_err) { + if (!out_err) return; + out_err->line = 0; + out_err->column = 0; +} + +static void merve_error_loc_set(merve_error_loc* out_err, + const lexer::error_location& loc) { + if (!out_err) return; + out_err->line = loc.line; + out_err->column = loc.column; +} + extern "C" { -merve_analysis merve_parse_commonjs(const char* input, size_t length) { +merve_analysis merve_parse_commonjs(const char* input, size_t length, + merve_error_loc* out_err) { + merve_error_loc_clear(out_err); + merve_analysis_impl* impl = new (std::nothrow) merve_analysis_impl(); if (!impl) return nullptr; if (input != nullptr) { @@ -24,6 +40,13 @@ merve_analysis merve_parse_commonjs(const char* input, size_t length) { } else { impl->result = lexer::parse_commonjs(std::string_view("", 0)); } + + if (!impl->result.has_value() && out_err) { + if (const auto& err_loc = lexer::get_last_error_location()) { + merve_error_loc_set(out_err, *err_loc); + } + } + return static_cast(impl); } @@ -57,8 +80,7 @@ merve_string merve_get_export_name(merve_analysis result, size_t index) { if (!impl->result.has_value()) return merve_string_create(nullptr, 0); if (index >= impl->result->exports.size()) return merve_string_create(nullptr, 0); - std::string_view sv = - lexer::get_string_view(impl->result->exports[index]); + std::string_view sv = lexer::get_string_view(impl->result->exports[index]); return merve_string_create(sv.data(), sv.size()); } @@ -76,8 +98,7 @@ merve_string merve_get_reexport_name(merve_analysis result, size_t index) { if (!impl->result.has_value()) return merve_string_create(nullptr, 0); if (index >= impl->result->re_exports.size()) return merve_string_create(nullptr, 0); - std::string_view sv = - lexer::get_string_view(impl->result->re_exports[index]); + std::string_view sv = lexer::get_string_view(impl->result->re_exports[index]); return merve_string_create(sv.data(), sv.size()); } @@ -105,4 +126,4 @@ merve_version_components merve_get_version_components(void) { return vc; } -} /* extern "C" */ +} /* extern "C" */ diff --git a/src/parser.cpp b/src/parser.cpp index c977db0..3f87e8f 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -312,6 +312,40 @@ struct StarExportBinding { // Thread-local state for error tracking (safe for concurrent parse calls). thread_local std::optional last_error; +thread_local std::optional last_error_location; + +static error_location makeErrorLocation(const char* source, const char* end, const char* at) { + const char* target = at; + if (target < source) target = source; + if (target > end) target = end; + + uint32_t line = 1; + uint32_t column = 1; + const char* cur = source; + + while (cur < target) { + const char ch = *cur++; + if (ch == '\n') { + line++; + column = 1; + continue; + } + if (ch == '\r') { + line++; + column = 1; + if (cur < target && *cur == '\n') { + cur++; + } + continue; + } + column++; + } + + error_location loc{}; + loc.line = line; + loc.column = column; + return loc; +} // Lexer state class class CJSLexer { @@ -332,6 +366,7 @@ class CJSLexer { std::array templateStack_; std::array openTokenPosStack_; + std::array openTokenTypeStack_; std::array openClassPosStack; std::array starExportStack_; StarExportBinding* starExportStack; @@ -483,9 +518,11 @@ class CJSLexer { } // Parsing utilities - void syntaxError(lexer_error code) { + void syntaxError(lexer_error code, const char* at = nullptr) { if (!last_error) { last_error = code; + const char* error_pos = at ? at : pos; + last_error_location = makeErrorLocation(source, end, error_pos); } pos = end + 1; } @@ -1488,6 +1525,7 @@ class CJSLexer { char ch = commentWhitespace(); switch (ch) { case '(': + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = startPos; return; case '.': @@ -1501,7 +1539,7 @@ class CJSLexer { // It's something like import.metaData, not import.meta return; } - syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT_META); + syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT_META, startPos); } return; default: @@ -1516,17 +1554,18 @@ class CJSLexer { pos--; return; } - syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT); + syntaxError(lexer_error::UNEXPECTED_ESM_IMPORT, startPos); } } void throwIfExportStatement() { + const char* startPos = pos; pos += 6; const char* curPos = pos; char ch = commentWhitespace(); if (pos == curPos && !isPunctuator(ch)) return; - syntaxError(lexer_error::UNEXPECTED_ESM_EXPORT); + syntaxError(lexer_error::UNEXPECTED_ESM_EXPORT, startPos); } public: @@ -1535,7 +1574,7 @@ class CJSLexer { templateStackDepth(0), openTokenDepth(0), templateDepth(0), line(1), lastSlashWasDivision(false), nextBraceIsClass(false), - templateStack_{}, openTokenPosStack_{}, openClassPosStack{}, + templateStack_{}, openTokenPosStack_{}, openTokenTypeStack_{}, openClassPosStack{}, starExportStack_{}, starExportStack(nullptr), STAR_EXPORT_STACK_END(nullptr), exports(out_exports), re_exports(out_re_exports) {} @@ -1600,6 +1639,7 @@ class CJSLexer { pos += 23; if (*pos == '(') { pos++; + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = lastTokenPos; if (tryParseRequire(RequireType::Import) && keywordStart(startPos)) tryBacktrackAddStarExportBinding(startPos - 1); @@ -1609,6 +1649,7 @@ class CJSLexer { if (pos + 4 < end && matchesAt(pos, end, "Star")) pos += 4; if (*pos == '(') { + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = lastTokenPos; if (*(pos + 1) == 'r') { pos++; @@ -1643,6 +1684,7 @@ class CJSLexer { tryParseObjectDefineOrKeys(openTokenDepth == 0); break; case '(': + openTokenTypeStack_[openTokenDepth] = '('; openTokenPosStack_[openTokenDepth++] = lastTokenPos; break; case ')': @@ -1655,6 +1697,7 @@ class CJSLexer { case '{': openClassPosStack[openTokenDepth] = nextBraceIsClass; nextBraceIsClass = false; + openTokenTypeStack_[openTokenDepth] = '{'; openTokenPosStack_[openTokenDepth++] = lastTokenPos; break; case '}': @@ -1717,6 +1760,19 @@ class CJSLexer { lastTokenPos = pos; } + if (!last_error) { + if (templateDepth != std::numeric_limits::max()) { + syntaxError(lexer_error::UNTERMINATED_TEMPLATE_STRING, end); + } else if (openTokenDepth != 0) { + const char open_ch = openTokenTypeStack_[openTokenDepth - 1]; + if (open_ch == '{') { + syntaxError(lexer_error::UNTERMINATED_BRACE, end); + } else { + syntaxError(lexer_error::UNTERMINATED_PAREN, end); + } + } + } + if (templateDepth != std::numeric_limits::max() || openTokenDepth || last_error) { return false; } @@ -1727,6 +1783,7 @@ class CJSLexer { std::optional parse_commonjs(std::string_view file_contents) { last_error.reset(); + last_error_location.reset(); lexer_analysis result; CJSLexer lexer(result.exports, result.re_exports); @@ -1742,4 +1799,8 @@ const std::optional& get_last_error() { return last_error; } +const std::optional& get_last_error_location() { + return last_error_location; +} + } // namespace lexer diff --git a/tests/c_api_compile_test.c b/tests/c_api_compile_test.c index 77caaa1..e9419de 100644 --- a/tests/c_api_compile_test.c +++ b/tests/c_api_compile_test.c @@ -23,6 +23,15 @@ static void check_types(void) { merve_analysis a = (merve_analysis)0; (void)a; + merve_error_loc loc; + loc.line = 0; + loc.column = 0; + (void)loc; + + merve_analysis (*parse_fn)(const char*, size_t, merve_error_loc*) = + &merve_parse_commonjs; + (void)parse_fn; + /* Verify the error constants are valid integer constant expressions. */ int errors[] = { MERVE_ERROR_TODO, diff --git a/tests/c_api_tests.cpp b/tests/c_api_tests.cpp index cdcd0e4..ebc7386 100644 --- a/tests/c_api_tests.cpp +++ b/tests/c_api_tests.cpp @@ -11,6 +11,10 @@ static bool merve_string_eq(merve_string s, const char* expected) { return std::memcmp(s.data, expected, expected_len) == 0; } +static bool merve_error_loc_is_zero(merve_error_loc loc) { + return loc.line == 0 && loc.column == 0; +} + TEST(c_api_tests, version_string) { const char* version = merve_get_version(); ASSERT_NE(version, nullptr); @@ -106,6 +110,102 @@ TEST(c_api_tests, esm_import_error) { merve_free(result); } +TEST(c_api_tests, parse_commonjs_success_clears_error_location) { + const char* source = "exports.foo = 1;"; + merve_error_loc loc{9, 9}; + merve_analysis result = + merve_parse_commonjs(source, std::strlen(source), &loc); + ASSERT_NE(result, nullptr); + ASSERT_TRUE(merve_is_valid(result)); + ASSERT_TRUE(merve_error_loc_is_zero(loc)); + merve_free(result); +} + +TEST(c_api_tests, parse_commonjs_error_location) { + const char* source = "\n import 'x';"; + merve_error_loc loc{123, 456}; + merve_analysis result = + merve_parse_commonjs(source, std::strlen(source), &loc); + ASSERT_NE(result, nullptr); + ASSERT_FALSE(merve_is_valid(result)); + ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_IMPORT); + + ASSERT_EQ(loc.line, 2u); + ASSERT_EQ(loc.column, 3u); + + merve_free(result); +} + +TEST(c_api_tests, parse_commonjs_error_location_crlf) { + const char* source = "\r\n import 'x';"; + merve_error_loc loc{123, 456}; + merve_analysis result = + merve_parse_commonjs(source, std::strlen(source), &loc); + ASSERT_NE(result, nullptr); + ASSERT_FALSE(merve_is_valid(result)); + ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_IMPORT); + + ASSERT_EQ(loc.line, 2u); + ASSERT_EQ(loc.column, 3u); + + merve_free(result); +} + +TEST(c_api_tests, parse_commonjs_export_error_location) { + const char* source = "\n export { x };"; + merve_error_loc loc{123, 456}; + merve_analysis result = + merve_parse_commonjs(source, std::strlen(source), &loc); + ASSERT_NE(result, nullptr); + ASSERT_FALSE(merve_is_valid(result)); + ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_EXPORT); + + ASSERT_EQ(loc.line, 2u); + ASSERT_EQ(loc.column, 3u); + + merve_free(result); +} + +TEST(c_api_tests, parse_commonjs_import_meta_error_location) { + const char* source = "\n import.meta.url"; + merve_error_loc loc{123, 456}; + merve_analysis result = + merve_parse_commonjs(source, std::strlen(source), &loc); + ASSERT_NE(result, nullptr); + ASSERT_FALSE(merve_is_valid(result)); + ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_IMPORT_META); + + ASSERT_EQ(loc.line, 2u); + ASSERT_EQ(loc.column, 3u); + + merve_free(result); +} + +TEST(c_api_tests, parse_commonjs_eof_unterminated_paren_location) { + const char* source = "(a + b"; + merve_error_loc loc{123, 456}; + merve_analysis result = + merve_parse_commonjs(source, std::strlen(source), &loc); + ASSERT_NE(result, nullptr); + ASSERT_FALSE(merve_is_valid(result)); + ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNTERMINATED_PAREN); + + ASSERT_EQ(loc.line, 1u); + ASSERT_EQ(loc.column, 7u); + + merve_free(result); +} + +TEST(c_api_tests, parse_commonjs_accepts_null_out_err) { + const char* source = "import 'x';"; + merve_analysis result = + merve_parse_commonjs(source, std::strlen(source), NULL); + ASSERT_NE(result, nullptr); + ASSERT_FALSE(merve_is_valid(result)); + ASSERT_EQ(merve_get_last_error(), MERVE_ERROR_UNEXPECTED_ESM_IMPORT); + merve_free(result); +} + TEST(c_api_tests, esm_export_error) { const char* source = "export { x };"; merve_analysis result = merve_parse_commonjs(source, std::strlen(source)); diff --git a/tests/real_world_tests.cpp b/tests/real_world_tests.cpp index 08e54f4..12a3bf7 100644 --- a/tests/real_world_tests.cpp +++ b/tests/real_world_tests.cpp @@ -980,6 +980,70 @@ TEST(real_world_tests, esm_syntax_error_import_meta) { ASSERT_EQ(err, lexer::lexer_error::UNEXPECTED_ESM_IMPORT_META); } +TEST(real_world_tests, eof_unterminated_brace_error) { + auto result = lexer::parse_commonjs("(function test() {"); + ASSERT_FALSE(result.has_value()); + auto err = lexer::get_last_error(); + ASSERT_TRUE(err.has_value()); + ASSERT_EQ(err, lexer::lexer_error::UNTERMINATED_BRACE); +} + +TEST(real_world_tests, eof_unterminated_paren_error) { + auto result = lexer::parse_commonjs("(a + b"); + ASSERT_FALSE(result.has_value()); + auto err = lexer::get_last_error(); + ASSERT_TRUE(err.has_value()); + ASSERT_EQ(err, lexer::lexer_error::UNTERMINATED_PAREN); +} + +TEST(real_world_tests, error_location_state_resets_after_success) { + auto failed = lexer::parse_commonjs("\n import 'x';"); + ASSERT_FALSE(failed.has_value()); + + auto loc_after_error = lexer::get_last_error_location(); + ASSERT_TRUE(loc_after_error.has_value()); + ASSERT_EQ(loc_after_error->line, 2u); + ASSERT_EQ(loc_after_error->column, 3u); + + auto ok = lexer::parse_commonjs("exports.ok = 1;"); + ASSERT_TRUE(ok.has_value()); + ASSERT_FALSE(lexer::get_last_error_location().has_value()); +} + +TEST(real_world_tests, error_location_crlf_line_counting) { + auto failed = lexer::parse_commonjs("\r\n import 'x';"); + ASSERT_FALSE(failed.has_value()); + auto err = lexer::get_last_error(); + ASSERT_TRUE(err.has_value()); + ASSERT_EQ(err, lexer::lexer_error::UNEXPECTED_ESM_IMPORT); + + auto loc = lexer::get_last_error_location(); + ASSERT_TRUE(loc.has_value()); + ASSERT_EQ(loc->line, 2u); + ASSERT_EQ(loc->column, 3u); +} + +TEST(real_world_tests, error_location_import_meta_and_eof) { + auto import_meta = lexer::parse_commonjs("\n import.meta.url"); + ASSERT_FALSE(import_meta.has_value()); + ASSERT_EQ(lexer::get_last_error(), + lexer::lexer_error::UNEXPECTED_ESM_IMPORT_META); + + auto import_meta_loc = lexer::get_last_error_location(); + ASSERT_TRUE(import_meta_loc.has_value()); + ASSERT_EQ(import_meta_loc->line, 2u); + ASSERT_EQ(import_meta_loc->column, 3u); + + auto eof_unterminated = lexer::parse_commonjs("(a + b"); + ASSERT_FALSE(eof_unterminated.has_value()); + ASSERT_EQ(lexer::get_last_error(), lexer::lexer_error::UNTERMINATED_PAREN); + + auto eof_loc = lexer::get_last_error_location(); + ASSERT_TRUE(eof_loc.has_value()); + ASSERT_EQ(eof_loc->line, 1u); + ASSERT_EQ(eof_loc->column, 7u); +} + TEST(real_world_tests, unicode_escape_sequences) { // Test various unicode escape sequences in exports auto result = lexer::parse_commonjs("\