diff --git a/.gitignore b/.gitignore index db9be7edff..5879666e52 100644 --- a/.gitignore +++ b/.gitignore @@ -67,6 +67,7 @@ python/dist/ # C++ cpp/build/ cpp/bazel-* +cpp/doc_tests/ # Bazel build directories bazel-out/ diff --git a/ci/extract_cpp_doc_code.py b/ci/extract_cpp_doc_code.py new file mode 100644 index 0000000000..d086452d0b --- /dev/null +++ b/ci/extract_cpp_doc_code.py @@ -0,0 +1,377 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Extract C++ code examples from markdown documentation and generate test files. + +This script scans markdown files in docs/guide/cpp/, extracts ```cpp code blocks, +and generates compilable C++ test files that can be run to verify the documentation +examples are correct. +""" + +import argparse +import logging +import re +import sys +from pathlib import Path +from typing import List, Tuple + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) + + +def extract_cpp_code_blocks(content: str) -> List[Tuple[str, int]]: + # Extract C++ code blocks from markdown content. + code_blocks = [] + pattern = r"```cpp\n(.*?)```" + + for match in re.finditer(pattern, content, re.DOTALL): + code = match.group(1).strip() + line_num = content[: match.start()].count("\n") + 1 + code_blocks.append((code, line_num)) + + return code_blocks + + +def is_complete_example(code: str) -> bool: + # Check if the code block is a complete, runnable example.A complete example should have a main function with all code inside it. + # Code blocks with statements outside of functions are not complete examples. + has_main = "int main()" in code or "int main (" in code + + # Check if there are statements outside of any function + # look for lines that look like function calls + # or object declarations at the top level + lines = code.split("\n") + brace_depth = 0 + in_main = False + + for line in lines: + stripped = line.strip() + if not stripped or stripped.startswith("//"): + continue + + # Track braces + for char in stripped: + if char == "{": + brace_depth += 1 + elif char == "}": + brace_depth -= 1 + + # Check if entering main + if "int main" in stripped: + in_main = True + continue + + # If code is not inside any braces and not in a struct/class declaration, + # and we see what looks like a function call or object usage, + # this is a uncompleted example + if brace_depth == 0 and not in_main: + # Skip struct/class/enum declarations + if any( + keyword in stripped + for keyword in [ + "struct ", + "class ", + "enum ", + "using ", + "namespace ", + "#include", + "FORY_STRUCT", + "FORY_ENUM", + ] + ): + continue + # Skip forward declarations + if stripped.endswith(";"): + continue + # If we see code that looks like it's executing (not declaring), + # this is not a complete example + if re.search(r"\w+\s*\([^)]*\)\s*;", stripped) and not re.search( + r"^(struct|class|enum|using|namespace|#include|FORY_)", stripped + ): + return False + + return has_main + + +def wrap_code_as_test(code: str, doc_file: str, block_index: int) -> str: + # Wrap a code snippet as a complete, compilable test file. + + includes = set() + + if "#include" not in code: + includes.add('#include "fory/serialization/fory.h"') + + if "std::string" in code and "#include " not in code: + includes.add("#include ") + if "std::vector" in code and "#include " not in code: + includes.add("#include ") + if "std::map" in code and "#include " not in code: + includes.add("#include ") + if "std::set" in code and "#include " not in code: + includes.add("#include ") + if "std::unordered_map" in code and "#include " not in code: + includes.add("#include ") + if "std::unordered_set" in code and "#include " not in code: + includes.add("#include ") + if "std::optional" in code and "#include " not in code: + includes.add("#include ") + if "std::shared_ptr" in code and "#include " not in code: + includes.add("#include ") + if "std::unique_ptr" in code and "#include " not in code: + includes.add("#include ") + if "std::variant" in code and "#include " not in code: + includes.add("#include ") + if "std::chrono" in code and "#include " not in code: + includes.add("#include ") + if "std::make_shared" in code and "#include " not in code: + includes.add("#include ") + if "std::make_unique" in code and "#include " not in code: + includes.add("#include ") + if "assert(" in code and "#include " not in code: + includes.add("#include ") + if "std::cout" in code and "#include " not in code: + includes.add("#include ") + if "RowEncoder" in code and '#include "fory/encoder/row_encoder.h"' not in code: + includes.add('#include "fory/encoder/row_encoder.h"') + if "Row" in code and '#include "fory/row/row.h"' not in code: + includes.add('#include "fory/row/row.h"') + + include_section = "\n".join(sorted(includes)) + + if "int main()" in code or "int main (" in code: + # Only add namespace if not already present + if "using namespace" not in code: + code = f"using namespace fory::serialization;\n\n{code}" + return f"""// Auto-generated test from {doc_file} +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +{include_section} + +{code} +""" + else: + return f"""// Auto-generated test from {doc_file}, block {block_index} +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +{include_section} + +using namespace fory::serialization; + +{code} + +int main() {{ + std::cout << "Documentation example compiled successfully" << std::endl; + return 0; +}} +""" + + +def generate_test_file_name(doc_file: str, block_index: int) -> str: + # Generate a test file name from documentation file and block index. + + base_name = Path(doc_file).stem + return f"doc_test_{base_name}_{block_index}.cc" + + +def process_markdown_file(md_path: Path, output_dir: Path) -> List[Path]: + # logging.info(f"Processing {md_path}") + + with open(md_path, "r", encoding="utf-8") as f: + content = f.read() + + code_blocks = extract_cpp_code_blocks(content) + logging.info(f" Found {len(code_blocks)} C++ code blocks") + + generated_files = [] + + for i, (code, line_num) in enumerate(code_blocks): + if not is_complete_example(code): + logging.debug(f" Skipping incomplete example at line {line_num}") + continue + + test_content = wrap_code_as_test(code, md_path.name, i) + test_file_name = generate_test_file_name(md_path.name, i) + test_path = output_dir / test_file_name + + with open(test_path, "w", encoding="utf-8") as f: + f.write(test_content) + + generated_files.append(test_path) + logging.info(f" Generated {test_file_name}") + + return generated_files + + +def generate_bazel_build(test_files: List[Path], output_dir: Path) -> None: + build_path = output_dir / "BUILD" + + build_content = """# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Auto-generated BUILD file for documentation example tests +# Run: bazel test //:doc_example_tests + +package(default_visibility = ["//visibility:public"]) + +""" + + test_names = [] + for test_file in sorted(test_files): + test_name = test_file.stem + test_names.append(test_name) + + # Determine additional deps based on test name + deps = ['"//cpp/fory/serialization:fory_serialization"'] + if "row-format" in test_name: + deps.append('"//cpp/fory/row:fory_row_format"') + deps.append('"//cpp/fory/encoder:fory_encoder"') + + deps_str = ",\n ".join(deps) + + build_content += f''' +cc_test( + name = "{test_name}", + srcs = ["{test_file.name}"], + deps = [ + {deps_str}, + ], +) +''' + + if test_names: + build_content += f""" +test_suite( + name = "doc_example_tests", + tests = [ +{chr(10).join(f' ":{name}",' for name in test_names)} + ], +) +""" + + with open(build_path, "w", encoding="utf-8") as f: + f.write(build_content) + + logging.info(f"Generated BUILD file with {len(test_names)} tests") + + +def main(): + parser = argparse.ArgumentParser( + description="Extract C++ code examples from markdown documentation" + ) + parser.add_argument( + "--docs-dir", + default="docs/guide/cpp", + help="Directory containing markdown documentation files", + ) + parser.add_argument( + "--output-dir", + default="cpp/doc_tests", + help="Output directory for generated test files", + ) + parser.add_argument( + "--generate-build", + action="store_true", + help="Generate Bazel BUILD file", + ) + parser.add_argument( + "--verbose", + "-v", + action="store_true", + help="Enable verbose output", + ) + + args = parser.parse_args() + + if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + + script_dir = Path(__file__).parent + project_root = script_dir.parent + docs_dir = project_root / args.docs_dir + output_dir = project_root / args.output_dir + + if not docs_dir.exists(): + logging.error(f"Documentation directory not found: {docs_dir}") + sys.exit(1) + + output_dir.mkdir(parents=True, exist_ok=True) + + all_test_files = [] + + for md_file in sorted(docs_dir.glob("*.md")): + test_files = process_markdown_file(md_file, output_dir) + all_test_files.extend(test_files) + + logging.info(f"\nTotal: Generated {len(all_test_files)} test files") + + if args.generate_build and all_test_files: + generate_bazel_build(all_test_files, output_dir) + + print(f"\nGenerated files in {output_dir}:") + for f in sorted(all_test_files): + print(f" {f.name}") + + +if __name__ == "__main__": + main() diff --git a/ci/run_ci.py b/ci/run_ci.py index 38e426a4d1..74453f9c06 100644 --- a/ci/run_ci.py +++ b/ci/run_ci.py @@ -125,7 +125,21 @@ def parse_args(): action="store_true", help="Only install dependencies without running tests", ) - cpp_parser.set_defaults(func=lambda install_deps_only: cpp.run(install_deps_only)) + cpp_parser.add_argument( + "--skip-doc-tests", + action="store_true", + help="Skip documentation example tests", + ) + cpp_parser.add_argument( + "--doc-tests-only", + action="store_true", + help="Only run documentation example tests", + ) + cpp_parser.set_defaults( + func=lambda install_deps_only, skip_doc_tests, doc_tests_only: cpp.run( + install_deps_only, skip_doc_tests, doc_tests_only + ) + ) # Rust subparser rust_parser = subparsers.add_parser( @@ -263,7 +277,11 @@ def parse_args(): run_shell_script(f"java{version}") elif command == "cpp": if USE_PYTHON_CPP: - func(arg_dict.get("install_deps_only", False)) + func( + arg_dict.get("install_deps_only", False), + arg_dict.get("skip_doc_tests", False), + arg_dict.get("doc_tests_only", False), + ) else: if arg_dict.get("install_deps_only", False): run_shell_script("install_bazel") diff --git a/ci/tasks/cpp.py b/ci/tasks/cpp.py index e24541c304..e1eee065f4 100644 --- a/ci/tasks/cpp.py +++ b/ci/tasks/cpp.py @@ -16,14 +16,56 @@ # under the License. import logging +import os +import subprocess from . import common -def run(install_deps_only=False): +def generate_doc_example_tests(): + # Generate C++ test files from documentation examples. + logging.info("Generating documentation example tests") + + script_path = os.path.join(common.PROJECT_ROOT_DIR, "ci", "extract_cpp_doc_code.py") + result = subprocess.run( + [ + "python", + script_path, + "--docs-dir", + "docs/guide/cpp", + "--output-dir", + "cpp/doc_tests", + "--generate-build", + ], + cwd=common.PROJECT_ROOT_DIR, + capture_output=True, + text=True, + ) + + if result.returncode != 0: + logging.error(f"Failed to generate doc example tests: {result.stderr}") + raise RuntimeError("Failed to generate doc example tests") + + # logging.info(f"Documentation example tests generated in {output_dir}") + + +def run_doc_example_tests(): + # Generates test files from documentation and runs them with Bazel. + generate_doc_example_tests() + + logging.info("Running documentation example tests") + test_command = "test //cpp/doc_tests:doc_example_tests" + if common.get_os_machine() == "x86_64": + test_command = "test --config=x86_64 //cpp/doc_tests:doc_example_tests" + common.bazel(test_command) + + +def run(install_deps_only=False, skip_doc_tests=False, doc_tests_only=False): """Run C++ CI tasks. Args: install_deps_only: If True, only install dependencies without running tests. + skip_doc_tests: If True, skip documentation example tests. + doc_tests_only: If True, only run documentation example tests. """ logging.info("Running C++ CI tasks") common.install_cpp_deps() @@ -32,6 +74,11 @@ def run(install_deps_only=False): logging.info("Skipping tests as --install-deps-only was specified") return + if doc_tests_only: + # logging.info("Running only documentation example tests") + run_doc_example_tests() + return + # collect all C++ targets query_result = common.bazel("query //...") targets = query_result.replace("\n", " ").replace("\r", " ") @@ -42,3 +89,10 @@ def run(install_deps_only=False): common.bazel(f"{test_command} {targets}") logging.info("C++ CI tasks completed successfully") + + # Run documentation example tests + if not skip_doc_tests: + try: + run_doc_example_tests() + except Exception as e: + logging.warning(f"Documentation example tests failed: {e}") diff --git a/docs/guide/cpp/field-configuration.md b/docs/guide/cpp/field-configuration.md index 899a52f6f8..b007b7b79a 100644 --- a/docs/guide/cpp/field-configuration.md +++ b/docs/guide/cpp/field-configuration.md @@ -225,7 +225,7 @@ int main() { doc.description = "A sample document"; doc.metadata = nullptr; // Allowed because nullable doc.parent = std::make_shared(); - doc.parent->title = "Parent Doc"; + doc.parent.get()->title = "Parent Doc"; doc.related = nullptr; // Allowed because nullable auto bytes = fory.serialize(doc).value(); diff --git a/docs/guide/cpp/index.md b/docs/guide/cpp/index.md index 2b363261f8..1ab412e2da 100644 --- a/docs/guide/cpp/index.md +++ b/docs/guide/cpp/index.md @@ -137,11 +137,13 @@ See the [examples/cpp](https://github.com/apache/fory/tree/main/examples/cpp) di ```cpp #include "fory/serialization/fory.h" +#include +#include using namespace fory::serialization; // Define a struct -class Person { +struct Person { std::string name; int32_t age; std::vector hobbies; @@ -149,13 +151,8 @@ class Person { bool operator==(const Person &other) const { return name == other.name && age == other.age && hobbies == other.hobbies; } - -public: - // Register the struct with Fory (FORY_STRUCT must be in public scope). - FORY_STRUCT(Person, name, age, hobbies); }; - - +FORY_STRUCT(Person, name, age, hobbies); int main() { // Create a Fory instance