diff --git a/.github/workflows/run_preflight.yml b/.github/workflows/run_preflight.yml index 412e112..4a90aaf 100644 --- a/.github/workflows/run_preflight.yml +++ b/.github/workflows/run_preflight.yml @@ -37,3 +37,6 @@ jobs: - name: Clippy check working-directory: rust run: cargo clippy --all-targets --all-features -- -D warnings + + - name: Unused dependency check + uses: bnjbvr/cargo-machete@main diff --git a/.github/workflows/run_python_unit_tests.yml b/.github/workflows/run_python_unit_tests.yml index e9afc40..0723b13 100644 --- a/.github/workflows/run_python_unit_tests.yml +++ b/.github/workflows/run_python_unit_tests.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ] + python-version: ['3.11', '3.12', '3.13', '3.14'] steps: - uses: actions/checkout@v3 diff --git a/Cargo.toml b/Cargo.toml index 4aa8e70..001ffca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,8 @@ [workspace] +resolver = "3" members = [ "rust/codelist-rs", + "rust/codelist-systems-rs", "rust/codelist-validator-rs", "rust/codelist-builder-rs", "bindings/python", diff --git a/Justfile b/Justfile index 7683ffd..aba24d6 100644 --- a/Justfile +++ b/Justfile @@ -1,15 +1,27 @@ +# Default recipe: list all available recipes when you run `just` with no args +default: + @just --list + +# List all available recipes +list: + @just --list + # Ensure Rust code is formatted fmt: cargo fmt # Check if Rust code is formatted (used in CI) fmt-check: - cargo fmt -- --check + cargo fmt --all -- --check # Run Clippy with warnings treated as errors clippy: cargo clippy -- -D warnings +# Scan Cargo.toml files for unused dependencies +machete: + cargo machete + # Run Prettier on Markdown files prettier: npx prettier --write "**/*.md" @@ -18,17 +30,18 @@ prettier: prettier-check: npx prettier --check "**/*.md" -# CI task: check formatting and linting and all tests -ci: fmt-check clippy prettier-check test-python test-rust +# CI task: check formatting, linting, unused deps, and all tests +ci: fmt-check clippy machete prettier-check test-python test-rust -# Run python tests. # Requires an active Python venv (run `source bindings/python/.venv/bin/activate` # first). `maturin develop` builds the extension and installs it into the # active venv so the tests exercise the current Rust code, not a stale wheel. +# +# Run python tests test-python: maturin develop --manifest-path bindings/python/Cargo.toml sh bindings/python/tests/run.sh # Run rust tests test-rust: - cargo test --all-features \ No newline at end of file + cargo test --all-features diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index a4ff161..a9ebb77 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "codelists_rs" version = "0.1.0" -edition = "2021" +edition = "2024" [lib] name = "codelists_rs" diff --git a/bindings/python/src/codelist.rs b/bindings/python/src/codelist.rs index f325574..f309189 100755 --- a/bindings/python/src/codelist.rs +++ b/bindings/python/src/codelist.rs @@ -15,10 +15,10 @@ use codelist_rs::{ use codelist_validator_rs::validator::Validator; use indexmap::IndexSet; use pyo3::{ + PyErr, PyResult, exceptions::PyValueError, prelude::*, types::{PyDict, PySet}, - PyErr, PyResult, }; use regex::Regex; @@ -53,7 +53,7 @@ impl PyCodeList { _ => { return Err(PyErr::new::(format!( "Invalid codelist type: {codelist_type}" - ))) + ))); } }; diff --git a/bindings/python/src/factory.rs b/bindings/python/src/factory.rs index 91191a3..eca47e9 100644 --- a/bindings/python/src/factory.rs +++ b/bindings/python/src/factory.rs @@ -29,7 +29,7 @@ impl PyCodeListFactory { _ => { return Err(PyValueError::new_err(format!( "Invalid codelist type: {codelist_type}" - ))) + ))); } }; diff --git a/bindings/r/src/rust/Cargo.toml b/bindings/r/src/rust/Cargo.toml index ac5b790..305adc9 100644 --- a/bindings/r/src/rust/Cargo.toml +++ b/bindings/r/src/rust/Cargo.toml @@ -2,8 +2,8 @@ name = 'codelist' publish = false version = '0.1.0' -edition = '2021' -rust-version = '1.65' +edition = '2024' +rust-version = '1.85' [lib] crate-type = [ 'staticlib' ] diff --git a/rust/codelist-builder-rs/Cargo.toml b/rust/codelist-builder-rs/Cargo.toml index c8636f9..0b9e908 100644 --- a/rust/codelist-builder-rs/Cargo.toml +++ b/rust/codelist-builder-rs/Cargo.toml @@ -1,19 +1,17 @@ [package] name = "codelist-builder-rs" version = "0.1.0" -edition = "2021" +edition = "2024" authors = ["Caroline Morton "] description = "Builder library for medical codelists" [dependencies] -codelist-rs = { path = "../codelist-rs" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" thiserror = { version = "2.0.9" } thiserror-ext = { version = "0.2.1" } reqwest = { version = "0.12.2" } csv = { version = "1.3.1" } -async-trait = "0.1" [dev-dependencies] tokio = { version = "1.0", features = ["full"] } diff --git a/rust/codelist-rs/Cargo.toml b/rust/codelist-rs/Cargo.toml index 25b3b51..e74557a 100644 --- a/rust/codelist-rs/Cargo.toml +++ b/rust/codelist-rs/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "codelist-rs" version = "0.1.0" -edition = "2021" +edition = "2024" authors = ["Caroline Morton ", "Emma Bagshaw"] description = "Base library for medical code list handling" diff --git a/rust/codelist-rs/src/codelist.rs b/rust/codelist-rs/src/codelist.rs index ab32311..6f98c86 100644 --- a/rust/codelist-rs/src/codelist.rs +++ b/rust/codelist-rs/src/codelist.rs @@ -100,11 +100,7 @@ impl CodeList { /// found pub fn remove_entry(&mut self, code: &str) -> Result<(), CodeListError> { let removed = self.entries.remove(code); - if removed.is_some() { - Ok(()) - } else { - Err(CodeListError::entry_not_found(code)) - } + if removed.is_some() { Ok(()) } else { Err(CodeListError::entry_not_found(code)) } } /// Get the full entries of the codelist, including code, optional term and diff --git a/rust/codelist-rs/src/codelist_factory.rs b/rust/codelist-rs/src/codelist_factory.rs index 0d3b978..ace6882 100644 --- a/rust/codelist-rs/src/codelist_factory.rs +++ b/rust/codelist-rs/src/codelist_factory.rs @@ -290,17 +290,16 @@ impl CodeListFactory { let path = entry.path(); // Skips if not csv/json - if let Some(ext) = path.extension().and_then(|e| e.to_str()) { - if ext == "csv" || ext == "json" { - if let Some(path_str) = path.to_str() { - // TODO: We are using the file name as the codelist name, but this may not - // be the best approach - if let Ok(codelist) = - self.load_codelist_from_file(folder_path.to_string(), path_str) - { - codelists.push(codelist); - } - } + if let Some(ext) = path.extension().and_then(|e| e.to_str()) + && (ext == "csv" || ext == "json") + && let Some(path_str) = path.to_str() + { + // TODO: We are using the file name as the codelist name, but this may not + // be the best approach + if let Ok(codelist) = + self.load_codelist_from_file(folder_path.to_string(), path_str) + { + codelists.push(codelist); } } } @@ -482,18 +481,24 @@ C03,Test Disease 3,Description 3"; assert_eq!(codelist.entries.len(), 3); // Test individual entries exist - assert!(codelist - .entries - .iter() - .any(|e| e.0 == "A01" && e.1 .0 == Some("Test Disease 1".to_string()))); - assert!(codelist - .entries - .iter() - .any(|e| e.0 == "B02" && e.1 .0 == Some("Test Disease 2".to_string()))); - assert!(codelist - .entries - .iter() - .any(|e| e.0 == "C03" && e.1 .0 == Some("Test Disease 3".to_string()))); + assert!( + codelist + .entries + .iter() + .any(|e| e.0 == "A01" && e.1.0 == Some("Test Disease 1".to_string())) + ); + assert!( + codelist + .entries + .iter() + .any(|e| e.0 == "B02" && e.1.0 == Some("Test Disease 2".to_string())) + ); + assert!( + codelist + .entries + .iter() + .any(|e| e.0 == "C03" && e.1.0 == Some("Test Disease 3".to_string())) + ); assert!(!codelist.codelist_options.allow_duplicates); assert_eq!(codelist.codelist_options.code_column_name, "code".to_string()); @@ -690,18 +695,24 @@ A01"; // Missing columns assert_eq!(codelist.entries.len(), 3); // Test individual entries exist - assert!(codelist - .entries - .iter() - .any(|e| e.0 == "A01" && e.1 .0 == Some("Test Disease 1".to_string()))); - assert!(codelist - .entries - .iter() - .any(|e| e.0 == "B02" && e.1 .0 == Some("Test Disease 2".to_string()))); - assert!(codelist - .entries - .iter() - .any(|e| e.0 == "C03" && e.1 .0 == Some("Test Disease 3".to_string()))); + assert!( + codelist + .entries + .iter() + .any(|e| e.0 == "A01" && e.1.0 == Some("Test Disease 1".to_string())) + ); + assert!( + codelist + .entries + .iter() + .any(|e| e.0 == "B02" && e.1.0 == Some("Test Disease 2".to_string())) + ); + assert!( + codelist + .entries + .iter() + .any(|e| e.0 == "C03" && e.1.0 == Some("Test Disease 3".to_string())) + ); assert!(!codelist.codelist_options.allow_duplicates); assert_eq!(codelist.codelist_options.code_column_name, "code".to_string()); diff --git a/rust/codelist-rs/src/metadata/categorisation_and_usage.rs b/rust/codelist-rs/src/metadata/categorisation_and_usage.rs index 84d55d5..3820ffa 100644 --- a/rust/codelist-rs/src/metadata/categorisation_and_usage.rs +++ b/rust/codelist-rs/src/metadata/categorisation_and_usage.rs @@ -259,7 +259,10 @@ mod tests { let mut categorisation_and_usage = test_categorisation_and_usage_all_some(); let error = categorisation_and_usage.add_license("license1".to_string()).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "License already exists: Unable to add license license1. Please use update license instead."); + assert_eq!( + error_string, + "License already exists: Unable to add license license1. Please use update license instead." + ); Ok(()) } @@ -277,7 +280,10 @@ mod tests { let mut categorisation_and_usage = test_categorisation_and_usage_all_none(); let error = categorisation_and_usage.update_license("example".to_string()).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "License does not exist: Unable to update license example. Please use add license instead."); + assert_eq!( + error_string, + "License does not exist: Unable to update license example. Please use add license instead." + ); Ok(()) } diff --git a/rust/codelist-rs/src/metadata/purpose_and_context.rs b/rust/codelist-rs/src/metadata/purpose_and_context.rs index 3d3be13..0b04d1f 100644 --- a/rust/codelist-rs/src/metadata/purpose_and_context.rs +++ b/rust/codelist-rs/src/metadata/purpose_and_context.rs @@ -285,7 +285,10 @@ mod tests { let error = purpose_and_context.add_target_audience("Target Audience".to_string()).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Target audience already exists: Unable to add target audience. Please use update target audience instead."); + assert_eq!( + error_string, + "Target audience already exists: Unable to add target audience. Please use update target audience instead." + ); Ok(()) } @@ -303,7 +306,10 @@ mod tests { let error = purpose_and_context.update_target_audience("Target Audience".to_string()).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Target audience does not exist: Unable to update target audience. Please use add target audience instead."); + assert_eq!( + error_string, + "Target audience does not exist: Unable to update target audience. Please use add target audience instead." + ); Ok(()) } @@ -340,7 +346,10 @@ mod tests { let mut purpose_and_context = create_test_purpose_and_context_all_params_are_some(); let error = purpose_and_context.add_use_context("Use Context".to_string()).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Use context already exists: Unable to add use context. Please use update use context instead."); + assert_eq!( + error_string, + "Use context already exists: Unable to add use context. Please use update use context instead." + ); Ok(()) } @@ -357,7 +366,10 @@ mod tests { let mut purpose_and_context = create_test_purpose_and_context_all_params_are_none(); let error = purpose_and_context.update_use_context("Use Context".to_string()).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Use context does not exist: Unable to update use context. Please use add use context instead."); + assert_eq!( + error_string, + "Use context does not exist: Unable to update use context. Please use add use context instead." + ); Ok(()) } diff --git a/rust/codelist-rs/src/metadata/validation_and_review.rs b/rust/codelist-rs/src/metadata/validation_and_review.rs index b917bd3..e4bdbd2 100644 --- a/rust/codelist-rs/src/metadata/validation_and_review.rs +++ b/rust/codelist-rs/src/metadata/validation_and_review.rs @@ -395,7 +395,10 @@ mod tests { let mut validation_and_review = test_validation_and_review_all_params_are_some_or_true(); let error = validation_and_review.add_review_date(chrono::Utc::now()).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Review date already exists: Unable to add review date. Please use update review date instead."); + assert_eq!( + error_string, + "Review date already exists: Unable to add review date. Please use update review date instead." + ); Ok(()) } @@ -414,7 +417,10 @@ mod tests { let mut validation_and_review = test_validation_and_review_all_params_are_none(); let error = validation_and_review.update_review_date(chrono::Utc::now()).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Review date does not exist: Unable to update review date. Please use add review date instead."); + assert_eq!( + error_string, + "Review date does not exist: Unable to update review date. Please use add review date instead." + ); Ok(()) } @@ -513,7 +519,10 @@ mod tests { let error = validation_and_review.add_validation_notes("Validation Notes".to_string()).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Validation notes already exist: Unable to add validation notes. Please use update validation notes instead."); + assert_eq!( + error_string, + "Validation notes already exist: Unable to add validation notes. Please use update validation notes instead." + ); Ok(()) } @@ -536,7 +545,10 @@ mod tests { .update_validation_notes("Validation Notes".to_string()) .unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Validation notes do not exist: Unable to update validation notes. Please use add validation notes instead."); + assert_eq!( + error_string, + "Validation notes do not exist: Unable to update validation notes. Please use add validation notes instead." + ); Ok(()) } diff --git a/rust/codelist-systems-rs/Cargo.toml b/rust/codelist-systems-rs/Cargo.toml new file mode 100644 index 0000000..6096ea7 --- /dev/null +++ b/rust/codelist-systems-rs/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "codelist-systems-rs" +version = "0.1.0" +edition = "2024" +authors = ["Caroline Morton "] +description = "Coding systems (ICD10, SNOMED, OPCS, CTV3) as data with pluggable capabilities." + +[dependencies] +codelist-rs = { path = "../codelist-rs" } +regex = "1.11.1" +thiserror = "2.0.9" +thiserror-ext = "0.3.0" + +[dev-dependencies] +proptest = "1" diff --git a/rust/codelist-systems-rs/src/capabilities.rs b/rust/codelist-systems-rs/src/capabilities.rs new file mode 100644 index 0000000..0418998 --- /dev/null +++ b/rust/codelist-systems-rs/src/capabilities.rs @@ -0,0 +1,38 @@ +//! Optional capabilities that some coding systems provide. +//! +//! Each trait is a supertrait of [`CodingSystem`] so capability methods +//! have access to the system's `ID` and validation rules. A system opts +//! into a capability by implementing the corresponding trait; absence +//! of an impl means the capability doesn't apply to that system (e.g. +//! SNOMED codes are not `X`-extensible). + +use codelist_rs::types::NormalizedCode; + +use crate::core::CodingSystem; + +/// A coding system whose codes can be truncated to a shorter, still-valid +/// code. The target depth is baked into the impl (ICD10 truncates to 3 +/// characters); callers never pass one. +pub trait Truncatable: CodingSystem { + /// Whether `code` is longer than the system's canonical truncation + /// depth and therefore a candidate for [`truncate`](Self::truncate). + fn is_truncatable(code: &NormalizedCode) -> bool; + + /// Produce the truncated form. Callers should first check + /// [`is_truncatable`](Self::is_truncatable); calling `truncate` on a + /// code that isn't truncatable returns the input unchanged. + fn truncate(code: &NormalizedCode) -> NormalizedCode; +} + +/// A coding system where appending `X` to a code is a valid transformation +/// (currently ICD10 only). +pub trait XExtensible: CodingSystem { + /// Returns `true` if appending `X` to `code` is a valid system + /// transformation. For ICD10, this is true when `code` has exactly + /// 3 characters. + fn is_x_addable(code: &NormalizedCode) -> bool; + + /// Returns the code with `X` appended. Callers should first check + /// [`is_x_addable`](Self::is_x_addable). + fn add_x(code: &NormalizedCode) -> NormalizedCode; +} diff --git a/rust/codelist-systems-rs/src/core.rs b/rust/codelist-systems-rs/src/core.rs new file mode 100644 index 0000000..74da7e9 --- /dev/null +++ b/rust/codelist-systems-rs/src/core.rs @@ -0,0 +1,22 @@ +//! The core trait every coding system implements. + +use codelist_rs::types::{Code, CodeSystemId, NormalizedCode}; + +use crate::errors::{SystemError, ValidationError}; + +/// A coding system. +/// +/// Implementors are typically zero-sized marker types (e.g. `Icd10`, +/// `Snomed`). The trait is object-unsafe on purpose — dispatch happens +/// by pattern-matching on the enum tag stored inside `CodeList`. +pub trait CodingSystem { + /// Stable identifier for the system. + const ID: CodeSystemId; + + /// Produce a canonical form of the code (e.g. upper-case, strip + /// surrounding whitespace). + fn normalize(code: &Code) -> Result; + + /// Check that a normalised code satisfies the system's syntax rules. + fn validate_syntax(code: &NormalizedCode) -> Result<(), ValidationError>; +} diff --git a/rust/codelist-systems-rs/src/errors.rs b/rust/codelist-systems-rs/src/errors.rs new file mode 100644 index 0000000..ca71800 --- /dev/null +++ b/rust/codelist-systems-rs/src/errors.rs @@ -0,0 +1,19 @@ +//! Errors produced by the CodingSystem trait and its implementors. + +use thiserror::Error; +use thiserror_ext::Construct; + +#[derive(Debug, Error, Construct, Clone)] +pub enum SystemError { + #[error("{system}: {msg}")] + Normalisation { system: String, msg: String }, +} + +#[derive(Debug, Error, Construct, Clone)] +pub enum ValidationError { + #[error("Code {code} has invalid length for {system}: {reason}")] + InvalidLength { code: String, system: String, reason: String }, + + #[error("Code {code} has invalid contents for {system}: {reason}")] + InvalidContents { code: String, system: String, reason: String }, +} diff --git a/rust/codelist-systems-rs/src/icd10.rs b/rust/codelist-systems-rs/src/icd10.rs new file mode 100644 index 0000000..3c404c1 --- /dev/null +++ b/rust/codelist-systems-rs/src/icd10.rs @@ -0,0 +1,80 @@ +//! ICD10. +//! +//! Rules (preserved from the legacy `icd10_validator`): +//! 1. Length ≤ 7. +//! 2. First character is a letter (upper case after normalisation). +//! 3. Second and third characters are digits. +//! 4. Fourth character is `.`, a digit, or `X`. +//! 5. If fourth is `.`, at least one digit follows. +//! 6. If fourth is `X`, no further characters. +//! 7. Fifth–seventh characters are digits if present. + +use std::sync::LazyLock; + +use codelist_rs::types::{Code, CodeSystemId, NormalizedCode}; +use regex::Regex; + +use crate::{ + capabilities::{Truncatable, XExtensible}, + core::CodingSystem, + errors::{SystemError, ValidationError}, +}; + +/// ICD10 coding system marker. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Icd10; + +static ICD10_REGEX: LazyLock = LazyLock::new(|| { + Regex::new(r"^[A-Z]\d{2}(X|(\.\d{1,3})?|\d{1,4})?$").expect("ICD10 regex compiles") +}); + +impl CodingSystem for Icd10 { + const ID: CodeSystemId = CodeSystemId("ICD10"); + + fn normalize(code: &Code) -> Result { + let s = code.as_str().trim().to_ascii_uppercase(); + if s.is_empty() { + return Err(SystemError::normalisation("ICD10", "empty after trim")); + } + Ok(NormalizedCode::from(s)) + } + + fn validate_syntax(code: &NormalizedCode) -> Result<(), ValidationError> { + if code.as_str().len() > 7 { + return Err(ValidationError::invalid_length( + code.as_str().to_string(), + "ICD10".to_string(), + "length > 7".to_string(), + )); + } + if !ICD10_REGEX.is_match(code.as_str()) { + return Err(ValidationError::invalid_contents( + code.as_str().to_string(), + "ICD10".to_string(), + "does not match expected format".to_string(), + )); + } + Ok(()) + } +} + +impl Truncatable for Icd10 { + fn is_truncatable(code: &NormalizedCode) -> bool { + code.as_str().len() > 3 + } + + fn truncate(code: &NormalizedCode) -> NormalizedCode { + let s: String = code.as_str().chars().take(3).collect(); + NormalizedCode::from(s) + } +} + +impl XExtensible for Icd10 { + fn is_x_addable(code: &NormalizedCode) -> bool { + code.as_str().len() == 3 + } + + fn add_x(code: &NormalizedCode) -> NormalizedCode { + NormalizedCode::from(format!("{}X", code.as_str())) + } +} diff --git a/rust/codelist-systems-rs/src/lib.rs b/rust/codelist-systems-rs/src/lib.rs new file mode 100644 index 0000000..1a04eb6 --- /dev/null +++ b/rust/codelist-systems-rs/src/lib.rs @@ -0,0 +1,16 @@ +//! Coding systems (ICD10, SNOMED, OPCS, CTV3) as data. +//! +//! Each system is a zero-sized marker type implementing +//! [`core::CodingSystem`]. Optional capabilities (truncation, hierarchy, +//! mapping) are expressed as subtraits in [`capabilities`]. + +pub mod capabilities; +pub mod core; +pub mod errors; +pub mod icd10; + +pub use crate::{ + capabilities::{Truncatable, XExtensible}, + core::CodingSystem, + errors::{SystemError, ValidationError}, +}; diff --git a/rust/codelist-systems-rs/tests/icd10_rules.rs b/rust/codelist-systems-rs/tests/icd10_rules.rs new file mode 100644 index 0000000..e87e639 --- /dev/null +++ b/rust/codelist-systems-rs/tests/icd10_rules.rs @@ -0,0 +1,90 @@ +use codelist_rs::types::Code; +use codelist_systems_rs::{CodingSystem, icd10::Icd10}; + +#[test] +fn valid_icd10_codes_pass_syntax() { + for ok in ["A54", "A37", "A05", "B74.0", "N40", "M10", "Q90", "K02"] { + let c = Code::from(ok); + let n = Icd10::normalize(&c).unwrap(); + Icd10::validate_syntax(&n).unwrap_or_else(|_| panic!("{ok} should pass")); + } +} + +#[test] +fn invalid_icd10_codes_fail_syntax() { + // Previously-invalid codes from the existing ICD10 validator test-suite. + // Note: "a54" was invalid in the old validator (rejected lowercase), but is + // now valid because `normalize` uppercases before `validate_syntax` runs. + // That's the intended design change. Hence it's not in this list. + for bad in ["A009000000", "1009", "AA09", "A0A9", "A00A", "A00.A", "A00X12", "A00.4AA"] { + let c = Code::from(bad); + let n = Icd10::normalize(&c).unwrap(); + assert!(Icd10::validate_syntax(&n).is_err(), "{bad} should fail"); + } +} + +#[test] +fn too_long_icd10_codes_report_length_error() { + let c = Code::from("A009000000"); + let n = Icd10::normalize(&c).unwrap(); + let err = Icd10::validate_syntax(&n).unwrap_err(); + assert!(matches!(err, codelist_systems_rs::ValidationError::InvalidLength { .. })); +} + +#[test] +fn icd10_normalize_upper_cases_and_trims() { + let c = Code::from(" a54 "); + let n = Icd10::normalize(&c).unwrap(); + assert_eq!(n.as_str(), "A54"); + Icd10::validate_syntax(&n).unwrap(); +} + +#[test] +fn icd10_normalize_rejects_empty_code() { + let c = Code::from(" "); + assert!(Icd10::normalize(&c).is_err()); +} + +use proptest::prelude::*; + +proptest! { + #[test] + fn arbitrary_strings_match_regex_iff_validate_ok(s in "[A-Za-z0-9. X]{0,10}") { + let c = Code::from(s.as_str()); + let Ok(n) = Icd10::normalize(&c) else { return Ok(()); }; + let regex_ok = regex::Regex::new(r"^[A-Z]\d{2}(X|(\.\d{1,3})?|\d{1,4})?$") + .unwrap() + .is_match(n.as_str()) && n.as_str().len() <= 7; + let validate_ok = Icd10::validate_syntax(&n).is_ok(); + prop_assert_eq!(regex_ok, validate_ok); + } +} + +use codelist_systems_rs::{Truncatable, XExtensible}; + +#[test] +fn icd10_is_truncatable_when_longer_than_three() { + let n = Icd10::normalize(&Code::from("A00.4")).unwrap(); + assert!(Icd10::is_truncatable(&n)); + let short = Icd10::normalize(&Code::from("A00")).unwrap(); + assert!(!Icd10::is_truncatable(&short)); +} + +#[test] +fn icd10_truncate_to_three_chars() { + let n = Icd10::normalize(&Code::from("A00.4")).unwrap(); + let t = Icd10::truncate(&n); + assert_eq!(t.as_str(), "A00"); +} + +#[test] +fn icd10_is_x_addable_for_three_char_codes() { + let n = Icd10::normalize(&Code::from("A00")).unwrap(); + assert!(Icd10::is_x_addable(&n)); +} + +#[test] +fn icd10_add_x_appends_x() { + let n = Icd10::normalize(&Code::from("A00")).unwrap(); + assert_eq!(Icd10::add_x(&n).as_str(), "A00X"); +} diff --git a/rust/codelist-validator-rs/Cargo.toml b/rust/codelist-validator-rs/Cargo.toml index f612c2b..1c04fff 100644 --- a/rust/codelist-validator-rs/Cargo.toml +++ b/rust/codelist-validator-rs/Cargo.toml @@ -1,13 +1,12 @@ [package] name = "codelist-validator-rs" version = "0.1.0" -edition = "2021" +edition = "2024" authors = ["Caroline Morton ", "Emma Bagshaw"] description = "Validation library for medical code lists" [dependencies] codelist-rs = { path = "../codelist-rs" } -serde = { version = "1.0", features = ["derive"] } thiserror = { version = "2.0.9" } thiserror-ext = { version = "0.2.1" } regex = "1.11.1" diff --git a/rust/codelist-validator-rs/src/ctv3_validator.rs b/rust/codelist-validator-rs/src/ctv3_validator.rs index 2556217..3e6fbf0 100644 --- a/rust/codelist-validator-rs/src/ctv3_validator.rs +++ b/rust/codelist-validator-rs/src/ctv3_validator.rs @@ -99,7 +99,10 @@ mod tests { let validator = Ctv3Validator(&codelist); let code: &'static str = "A009000000"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code A009000000 is an invalid length for type CTV3. Reason: Code is greater than 5 characters in length"); + assert_eq!( + error, + "Code A009000000 is an invalid length for type CTV3. Reason: Code is greater than 5 characters in length" + ); Ok(()) } @@ -109,7 +112,10 @@ mod tests { let validator = Ctv3Validator(&codelist); let code = "Af."; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code Af. is an invalid length for type CTV3. Reason: Code is less than 5 characters in length"); + assert_eq!( + error, + "Code Af. is an invalid length for type CTV3. Reason: Code is less than 5 characters in length" + ); Ok(()) } @@ -119,7 +125,10 @@ mod tests { let validator = Ctv3Validator(&codelist); let code = ".a009"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code .a009 contents is invalid for type CTV3. Reason: Code does not match the expected format"); + assert_eq!( + error, + "Code .a009 contents is invalid for type CTV3. Reason: Code does not match the expected format" + ); Ok(()) } @@ -130,7 +139,10 @@ mod tests { let validator = Ctv3Validator(&codelist); let code = "10a.f"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code 10a.f contents is invalid for type CTV3. Reason: Code does not match the expected format"); + assert_eq!( + error, + "Code 10a.f contents is invalid for type CTV3. Reason: Code does not match the expected format" + ); Ok(()) } @@ -140,7 +152,10 @@ mod tests { let validator = Ctv3Validator(&codelist); let code = "Af!!!"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code Af!!! contents is invalid for type CTV3. Reason: Code does not match the expected format"); + assert_eq!( + error, + "Code Af!!! contents is invalid for type CTV3. Reason: Code does not match the expected format" + ); Ok(()) } diff --git a/rust/codelist-validator-rs/src/errors.rs b/rust/codelist-validator-rs/src/errors.rs index 23b9861..a78a0e9 100644 --- a/rust/codelist-validator-rs/src/errors.rs +++ b/rust/codelist-validator-rs/src/errors.rs @@ -15,7 +15,9 @@ pub enum CodeListValidatorError { #[error("Code {code} is an invalid length for type {codelist_type}. Reason: {reason}")] InvalidCodeLength { code: String, reason: String, codelist_type: String }, - #[error("Code {code} is not composed of all numerical characters for type {codelist_type}. Reason: {reason}")] + #[error( + "Code {code} is not composed of all numerical characters for type {codelist_type}. Reason: {reason}" + )] #[construct(skip)] ParseIntError { code: String, reason: String, codelist_type: String }, diff --git a/rust/codelist-validator-rs/src/icd10_validator.rs b/rust/codelist-validator-rs/src/icd10_validator.rs index 93f3b38..c709b33 100644 --- a/rust/codelist-validator-rs/src/icd10_validator.rs +++ b/rust/codelist-validator-rs/src/icd10_validator.rs @@ -95,7 +95,10 @@ mod tests { let validator = IcdValidator(&codelist); let code = "A009000000"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code A009000000 is an invalid length for type ICD10. Reason: Code is greater than 7 characters in length"); + assert_eq!( + error, + "Code A009000000 is an invalid length for type ICD10. Reason: Code is greater than 7 characters in length" + ); Ok(()) } @@ -105,7 +108,10 @@ mod tests { let validator = IcdValidator(&codelist); let code = "1009"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code 1009 contents is invalid for type ICD10. Reason: Code does not match the expected format"); + assert_eq!( + error, + "Code 1009 contents is invalid for type ICD10. Reason: Code does not match the expected format" + ); Ok(()) } @@ -115,7 +121,10 @@ mod tests { let validator = IcdValidator(&codelist); let code = "AA09"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code AA09 contents is invalid for type ICD10. Reason: Code does not match the expected format"); + assert_eq!( + error, + "Code AA09 contents is invalid for type ICD10. Reason: Code does not match the expected format" + ); Ok(()) } @@ -125,18 +134,24 @@ mod tests { let validator = IcdValidator(&codelist); let code = "A0A9"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code A0A9 contents is invalid for type ICD10. Reason: Code does not match the expected format"); + assert_eq!( + error, + "Code A0A9 contents is invalid for type ICD10. Reason: Code does not match the expected format" + ); Ok(()) } #[test] - fn test_validate_invalid_code_fourth_character_not_a_dot_number_or_x( - ) -> Result<(), CodeListError> { + fn test_validate_invalid_code_fourth_character_not_a_dot_number_or_x() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = IcdValidator(&codelist); let code = "A00A"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code A00A contents is invalid for type ICD10. Reason: Code does not match the expected format"); + assert_eq!( + error, + "Code A00A contents is invalid for type ICD10. Reason: Code does not match the expected format" + ); Ok(()) } @@ -147,7 +162,10 @@ mod tests { let validator = IcdValidator(&codelist); let code = "A00.A"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code A00.A contents is invalid for type ICD10. Reason: Code does not match the expected format"); + assert_eq!( + error, + "Code A00.A contents is invalid for type ICD10. Reason: Code does not match the expected format" + ); Ok(()) } @@ -158,18 +176,24 @@ mod tests { let validator = IcdValidator(&codelist); let code = "A00X12"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code A00X12 contents is invalid for type ICD10. Reason: Code does not match the expected format"); + assert_eq!( + error, + "Code A00X12 contents is invalid for type ICD10. Reason: Code does not match the expected format" + ); Ok(()) } #[test] - fn test_validate_invalid_code_fifth_to_seventh_characters_not_numbers( - ) -> Result<(), CodeListError> { + fn test_validate_invalid_code_fifth_to_seventh_characters_not_numbers() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = IcdValidator(&codelist); let code = "A00.4AA"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code A00.4AA contents is invalid for type ICD10. Reason: Code does not match the expected format"); + assert_eq!( + error, + "Code A00.4AA contents is invalid for type ICD10. Reason: Code does not match the expected format" + ); Ok(()) } @@ -179,7 +203,10 @@ mod tests { let validator = IcdValidator(&codelist); let code = "a54"; let error = validator.validate_code(code).unwrap_err().to_string(); - assert_eq!(error, "Code a54 contents is invalid for type ICD10. Reason: Code does not match the expected format"); + assert_eq!( + error, + "Code a54 contents is invalid for type ICD10. Reason: Code does not match the expected format" + ); Ok(()) } diff --git a/rust/codelist-validator-rs/src/opcs_validator.rs b/rust/codelist-validator-rs/src/opcs_validator.rs index 679a8c6..533ad42 100644 --- a/rust/codelist-validator-rs/src/opcs_validator.rs +++ b/rust/codelist-validator-rs/src/opcs_validator.rs @@ -95,98 +95,122 @@ mod tests { } #[test] - fn test_validate_code_with_invalid_code_length_less_than_3_characters( - ) -> Result<(), CodeListError> { + fn test_validate_code_with_invalid_code_length_less_than_3_characters() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = OpcsValidator(&codelist); let code = "A0"; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code A0 is an invalid length for type OPCS. Reason: Code is less than 3 characters in length"); + assert_eq!( + error_string, + "Code A0 is an invalid length for type OPCS. Reason: Code is less than 3 characters in length" + ); Ok(()) } #[test] - fn test_validate_code_with_invalid_code_length_greater_than_5_characters( - ) -> Result<(), CodeListError> { + fn test_validate_code_with_invalid_code_length_greater_than_5_characters() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = OpcsValidator(&codelist); let code = "A01000"; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code A01000 is an invalid length for type OPCS. Reason: Code is greater than 5 characters in length"); + assert_eq!( + error_string, + "Code A01000 is an invalid length for type OPCS. Reason: Code is greater than 5 characters in length" + ); Ok(()) } #[test] - fn test_validate_code_with_invalid_code_first_character_not_a_letter( - ) -> Result<(), CodeListError> { + fn test_validate_code_with_invalid_code_first_character_not_a_letter() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = OpcsValidator(&codelist); let code = "101"; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code 101 contents is invalid for type OPCS. Reason: Code does not match the expected format"); + assert_eq!( + error_string, + "Code 101 contents is invalid for type OPCS. Reason: Code does not match the expected format" + ); Ok(()) } #[test] - fn test_validate_code_with_invalid_code_second_character_not_a_number( - ) -> Result<(), CodeListError> { + fn test_validate_code_with_invalid_code_second_character_not_a_number() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = OpcsValidator(&codelist); let code = "AA1"; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code AA1 contents is invalid for type OPCS. Reason: Code does not match the expected format"); + assert_eq!( + error_string, + "Code AA1 contents is invalid for type OPCS. Reason: Code does not match the expected format" + ); Ok(()) } #[test] - fn test_validate_code_with_invalid_code_third_character_not_a_number( - ) -> Result<(), CodeListError> { + fn test_validate_code_with_invalid_code_third_character_not_a_number() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = OpcsValidator(&codelist); let code = "A0A"; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code A0A contents is invalid for type OPCS. Reason: Code does not match the expected format"); + assert_eq!( + error_string, + "Code A0A contents is invalid for type OPCS. Reason: Code does not match the expected format" + ); Ok(()) } #[test] - fn test_validate_code_with_invalid_code_no_fifth_character_after_dot( - ) -> Result<(), CodeListError> { + fn test_validate_code_with_invalid_code_no_fifth_character_after_dot() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = OpcsValidator(&codelist); let code = "A01."; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code A01. contents is invalid for type OPCS. Reason: Code does not match the expected format"); + assert_eq!( + error_string, + "Code A01. contents is invalid for type OPCS. Reason: Code does not match the expected format" + ); Ok(()) } #[test] - fn test_validate_code_with_invalid_code_fifth_character_after_dot_not_a_number( - ) -> Result<(), CodeListError> { + fn test_validate_code_with_invalid_code_fifth_character_after_dot_not_a_number() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = OpcsValidator(&codelist); let code = "A01.A"; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code A01.A contents is invalid for type OPCS. Reason: Code does not match the expected format"); + assert_eq!( + error_string, + "Code A01.A contents is invalid for type OPCS. Reason: Code does not match the expected format" + ); Ok(()) } #[test] - fn test_validate_code_with_invalid_code_fifth_character_not_a_number( - ) -> Result<(), CodeListError> { + fn test_validate_code_with_invalid_code_fifth_character_not_a_number() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = OpcsValidator(&codelist); let code = "A010A"; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code A010A contents is invalid for type OPCS. Reason: Code does not match the expected format"); + assert_eq!( + error_string, + "Code A010A contents is invalid for type OPCS. Reason: Code does not match the expected format" + ); Ok(()) } diff --git a/rust/codelist-validator-rs/src/snomed_validator.rs b/rust/codelist-validator-rs/src/snomed_validator.rs index c16cccf..0221f0c 100644 --- a/rust/codelist-validator-rs/src/snomed_validator.rs +++ b/rust/codelist-validator-rs/src/snomed_validator.rs @@ -81,55 +81,70 @@ mod tests { let code = "11A6BB789A"; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code 11A6BB789A is not composed of all numerical characters for type SNOMED. Reason: invalid digit found in string"); + assert_eq!( + error_string, + "Code 11A6BB789A is not composed of all numerical characters for type SNOMED. Reason: invalid digit found in string" + ); Ok(()) } #[test] - fn test_validate_code_with_invalid_code_length_less_than_min_length_of_3( - ) -> Result<(), CodeListError> { + fn test_validate_code_with_invalid_code_length_less_than_min_length_of_3() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = SnomedValidator(&codelist); let code = "11"; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code 11 is an invalid length for type SNOMED. Reason: Code is not between 6 and 18 numbers in length"); + assert_eq!( + error_string, + "Code 11 is an invalid length for type SNOMED. Reason: Code is not between 6 and 18 numbers in length" + ); Ok(()) } #[test] - fn test_validate_code_with_invalid_code_length_greater_than_max_length_of_18( - ) -> Result<(), CodeListError> { + fn test_validate_code_with_invalid_code_length_greater_than_max_length_of_18() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = SnomedValidator(&codelist); let code = "1111111111111111111111111111"; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code 1111111111111111111111111111 is an invalid length for type SNOMED. Reason: Code is not between 6 and 18 numbers in length"); + assert_eq!( + error_string, + "Code 1111111111111111111111111111 is an invalid length for type SNOMED. Reason: Code is not between 6 and 18 numbers in length" + ); Ok(()) } #[test] - fn test_validate_code_with_invalid_code_length_less_than_default_min_length( - ) -> Result<(), CodeListError> { + fn test_validate_code_with_invalid_code_length_less_than_default_min_length() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = SnomedValidator(&codelist); let code = "2043"; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code 2043 is an invalid length for type SNOMED. Reason: Code is not between 6 and 18 numbers in length"); + assert_eq!( + error_string, + "Code 2043 is an invalid length for type SNOMED. Reason: Code is not between 6 and 18 numbers in length" + ); Ok(()) } #[test] - fn test_validate_code_with_invalid_code_length_greater_than_default_max_length( - ) -> Result<(), CodeListError> { + fn test_validate_code_with_invalid_code_length_greater_than_default_max_length() + -> Result<(), CodeListError> { let codelist = create_test_codelist()?; let validator = SnomedValidator(&codelist); let code = "2043510071234567890"; let error = validator.validate_code(code).unwrap_err(); let error_string = error.to_string(); - assert_eq!(error_string, "Code 2043510071234567890 is an invalid length for type SNOMED. Reason: Code is not between 6 and 18 numbers in length"); + assert_eq!( + error_string, + "Code 2043510071234567890 is an invalid length for type SNOMED. Reason: Code is not between 6 and 18 numbers in length" + ); Ok(()) } diff --git a/rust/codelist-validator-rs/src/validator.rs b/rust/codelist-validator-rs/src/validator.rs index 3939812..393603c 100644 --- a/rust/codelist-validator-rs/src/validator.rs +++ b/rust/codelist-validator-rs/src/validator.rs @@ -59,11 +59,7 @@ fn custom_validate_all_code(codelist: &CodeList, re: &Regex) -> Result<(), CodeL } } - if reasons.is_empty() { - Ok(()) - } else { - Err(CodeListValidatorError::invalid_codelist(reasons)) - } + if reasons.is_empty() { Ok(()) } else { Err(CodeListValidatorError::invalid_codelist(reasons)) } } #[cfg(test)]