From d1fda53613da08f032a1f2af0a20293db5813ef7 Mon Sep 17 00:00:00 2001 From: Adit Sharma Date: Tue, 19 May 2026 01:38:16 +0530 Subject: [PATCH] Improve URL validation workflow Signed-off-by: Adit Sharma --- workflow_scripts/validate_all_urls.py | 86 +++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 10 deletions(-) diff --git a/workflow_scripts/validate_all_urls.py b/workflow_scripts/validate_all_urls.py index e6d6883..01ec273 100644 --- a/workflow_scripts/validate_all_urls.py +++ b/workflow_scripts/validate_all_urls.py @@ -2,27 +2,93 @@ import os from sys import exit + +import pathspec + from url_validator import validate_file +VALID_EXTENSIONS = { + ".md", + ".py", + ".rst", + ".txt", + ".yaml", + ".yml", + ".json", +} + + +def load_gitignore(): + """ + Load .gitignore patterns using pathspec. + """ + if not os.path.exists(".gitignore"): + return None + + with open(".gitignore", "r", encoding="utf-8") as gitignore: + patterns = gitignore.readlines() + + return pathspec.PathSpec.from_lines( + "gitwildmatch", + patterns + ) + + +def should_validate(file_name): + _, ext = os.path.splitext(file_name) + return ext.lower() in VALID_EXTENSIONS + + def validate_urls_under_directory(directory): total_count = 0 - invalid_url_count = 0 + passed_count = 0 + failed_count = 0 + + gitignore_spec = load_gitignore() for root, _, files in os.walk(directory): + for file_name in files: + file_path = os.path.join(root, file_name) + + relative_path = os.path.relpath(file_path, ".") + + # Skip .gitignore ignored files + if gitignore_spec and gitignore_spec.match_file(relative_path): + continue + + # Skip unsupported file types + if not should_validate(file_name): + continue + total_count += 1 - print(f"-----------validate {file_path}") - if not validate_file(file_path): - invalid_url_count += 1 - - if invalid_url_count == 0: - print(f"{total_count} files passed. ") - else: - print(f"{invalid_url_count} files failed in {total_count} files. ") + + print(f"\n[Validating]: {relative_path}") + + try: + if validate_file(file_path): + print("[PASSED]") + passed_count += 1 + else: + print("[FAILED]") + failed_count += 1 + + except Exception as error: + print(f"[ERROR]: {error}") + failed_count += 1 + + print("\n============= SUMMARY =============") + print(f"Total checked : {total_count}") + print(f"Passed : {passed_count}") + print(f"Failed : {failed_count}") + + if failed_count > 0: exit(1) + print("\n[SUCCESS] All URL validations passed!") + if __name__ == '__main__': - validate_urls_under_directory('.') + validate_urls_under_directory(".") \ No newline at end of file