diff --git a/rust/codelist-systems-rs/proptest-regressions/icd10.txt b/rust/codelist-systems-rs/proptest-regressions/icd10.txt new file mode 100644 index 0000000..368f3e7 --- /dev/null +++ b/rust/codelist-systems-rs/proptest-regressions/icd10.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc b8fd1ddd503d1efb257d26464f3b83c1cda33850554b076891a2cb0eedd14b9d # shrinks to prefix = "A꘠𜳰", illegal = "#" diff --git a/rust/codelist-systems-rs/proptest-regressions/opcs.txt b/rust/codelist-systems-rs/proptest-regressions/opcs.txt new file mode 100644 index 0000000..dea86e2 --- /dev/null +++ b/rust/codelist-systems-rs/proptest-regressions/opcs.txt @@ -0,0 +1,8 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 210afe1c3c37afae7f591bc2d085980e1b2fcfbdd7f60a0a026b00c204949e86 # shrinks to prefix = "A٠୦", trail = "!" +cc 8608455b24a86ddb96b487e0f62dea79e29e5f8a5081f34e991271198f340204 # shrinks to s = "A00.00" diff --git a/rust/codelist-systems-rs/proptest-regressions/snomed.txt b/rust/codelist-systems-rs/proptest-regressions/snomed.txt new file mode 100644 index 0000000..ee5e396 --- /dev/null +++ b/rust/codelist-systems-rs/proptest-regressions/snomed.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 72b50e46f8988e0f128a3c3245e0edc9bc82b82a2356438e8430077315aa6e98 # shrinks to s = "٠" diff --git a/rust/codelist-systems-rs/src/ctv3.rs b/rust/codelist-systems-rs/src/ctv3.rs new file mode 100644 index 0000000..f31467f --- /dev/null +++ b/rust/codelist-systems-rs/src/ctv3.rs @@ -0,0 +1,194 @@ +//! CTV3. +//! +//! Rules (preserved from the legacy `ctv3_validator`): +//! 1. Exactly 5 characters. +//! 2. Allowed characters: `a-z`, `A-Z`, `0-9`, and `.`. +//! 3. N alphanumeric characters followed by `5 - N` trailing dots (N = 0..=5). +//! +//! CTV3 is case-sensitive, so `normalize` trims whitespace only — +//! it does not fold case the way ICD10 and OPCS do. + +use std::sync::LazyLock; + +use codelist_rs::types::{Code, CodeSystemId, NormalizedCode}; +use regex::Regex; + +use crate::{ + core::CodingSystem, + errors::{SystemError, ValidationError}, +}; + +/// CTV3 coding system marker. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Ctv3; + +static CTV3_REGEX: LazyLock = LazyLock::new(|| { + Regex::new( + r"^(?:[a-zA-Z0-9]{5}|[a-zA-Z0-9]{4}\.|[a-zA-Z0-9]{3}\.{2}|[a-zA-Z0-9]{2}\.{3}|[a-zA-Z0-9]\.{4}|\.{5})$", + ) + .expect("CTV3 regex compiles") +}); + +impl CodingSystem for Ctv3 { + const ID: CodeSystemId = CodeSystemId("CTV3"); + + fn normalize(code: &Code) -> Result { + // CTV3 is case-sensitive — no case folding, trim only. + let s = code.as_str().trim().to_string(); + if s.is_empty() { + return Err(SystemError::normalisation("CTV3", "empty after trim")); + } + Ok(NormalizedCode::from(s)) + } + + fn validate_syntax(code: &NormalizedCode) -> Result<(), ValidationError> { + let len = code.as_str().len(); + if len != 5 { + return Err(ValidationError::invalid_length( + code.as_str().to_string(), + "CTV3".to_string(), + format!("length must be exactly 5 (got {len})"), + )); + } + if !CTV3_REGEX.is_match(code.as_str()) { + return Err(ValidationError::invalid_contents( + code.as_str().to_string(), + "CTV3".to_string(), + "does not match expected format".to_string(), + )); + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use codelist_rs::types::Code; + use proptest::prelude::*; + + use super::*; + + #[test] + fn valid_ctv3_codes_pass_syntax() { + for ok in ["Af918", "ABb..", "alkif", "F....", "bn89.", "Me...", "99999", "....."] { + let c = Code::from(ok); + let n = Ctv3::normalize(&c).unwrap(); + Ctv3::validate_syntax(&n).unwrap_or_else(|_| panic!("{ok} should pass")); + } + } + + #[test] + fn too_short_ctv3_codes_fail_with_invalid_length() { + for bad in ["Af.", "A00A", "10"] { + let c = Code::from(bad); + let n = Ctv3::normalize(&c).unwrap(); + let err = Ctv3::validate_syntax(&n).unwrap_err(); + assert!( + matches!(err, crate::errors::ValidationError::InvalidLength { .. }), + "{bad} should fail with InvalidLength" + ); + } + } + + #[test] + fn too_long_ctv3_codes_fail_with_invalid_length() { + for bad in ["A009000000", "9874ji", "Q90....."] { + let c = Code::from(bad); + let n = Ctv3::normalize(&c).unwrap(); + let err = Ctv3::validate_syntax(&n).unwrap_err(); + assert!( + matches!(err, crate::errors::ValidationError::InvalidLength { .. }), + "{bad} should fail with InvalidLength" + ); + } + } + + #[test] + fn bad_content_ctv3_codes_fail_with_invalid_contents() { + for bad in [".a009", "10a.f", "Af!!!", "A..9k", "..9jJ", "A00.l"] { + let c = Code::from(bad); + let n = Ctv3::normalize(&c).unwrap(); + let err = Ctv3::validate_syntax(&n).unwrap_err(); + assert!( + matches!(err, crate::errors::ValidationError::InvalidContents { .. }), + "{bad} should fail with InvalidContents" + ); + } + } + + #[test] + fn ctv3_normalize_preserves_case() { + // CTV3 is case-sensitive: normalize must not fold to uppercase. + // "Af918" must remain "Af918", not become "AF918" as ICD10/OPCS would. + let c = Code::from("Af918"); + let n = Ctv3::normalize(&c).unwrap(); + assert_eq!(n.as_str(), "Af918"); + } + + #[test] + fn ctv3_normalize_trims_whitespace() { + let c = Code::from(" Af918 "); + let n = Ctv3::normalize(&c).unwrap(); + assert_eq!(n.as_str(), "Af918"); + Ctv3::validate_syntax(&n).unwrap(); + } + + #[test] + fn ctv3_normalize_rejects_empty_after_trim() { + let c = Code::from(" "); + assert!(Ctv3::normalize(&c).is_err()); + } + + fn valid_ctv3() -> impl Strategy { + (0u32..=5).prop_flat_map(|n| { + proptest::string::string_regex(&format!("[a-zA-Z0-9]{{{n}}}")) + .unwrap() + .prop_map(move |s| format!("{s}{}", ".".repeat((5 - n) as usize))) + }) + } + + proptest! { + #[test] + fn valid_shape_ctv3_validates_ok(s in valid_ctv3()) { + let c = Code::from(s.as_str()); + let n = Ctv3::normalize(&c).unwrap(); + prop_assert!(Ctv3::validate_syntax(&n).is_ok()); + } + + #[test] + fn ctv3_disallowed_chars_fail_invalid_contents( + illegal in r"[!@#$%]", + suffix in r"[a-zA-Z0-9.]{4}", + ) { + let s = format!("{illegal}{suffix}"); + let c = Code::from(s.as_str()); + let n = Ctv3::normalize(&c).unwrap(); + let err = Ctv3::validate_syntax(&n).unwrap_err(); + let is_invalid_contents = matches!(err, ValidationError::InvalidContents { .. }); + prop_assert!(is_invalid_contents); + } + + #[test] + fn ctv3_out_of_range_length_fails_invalid_length( + s in prop_oneof![r"[a-zA-Z0-9.]{1,4}", r"[a-zA-Z0-9.]{6,12}"], + ) { + let c = Code::from(s.as_str()); + let n = Ctv3::normalize(&c).unwrap(); + let err = Ctv3::validate_syntax(&n).unwrap_err(); + let is_invalid_length = matches!(err, ValidationError::InvalidLength { .. }); + prop_assert!(is_invalid_length); + } + + #[test] + fn ctv3_trim_idempotent( + s in valid_ctv3(), + left in 0usize..5, + right in 0usize..5, + ) { + let padded = format!("{}{s}{}", " ".repeat(left), " ".repeat(right)); + let base = Ctv3::normalize(&Code::from(s.as_str())).unwrap(); + let pad = Ctv3::normalize(&Code::from(padded.as_str())).unwrap(); + prop_assert_eq!(base.as_str(), pad.as_str()); + } + } +} diff --git a/rust/codelist-systems-rs/src/icd10.rs b/rust/codelist-systems-rs/src/icd10.rs index 3c404c1..ad1d541 100644 --- a/rust/codelist-systems-rs/src/icd10.rs +++ b/rust/codelist-systems-rs/src/icd10.rs @@ -78,3 +78,140 @@ impl XExtensible for Icd10 { NormalizedCode::from(format!("{}X", code.as_str())) } } + +#[cfg(test)] +mod tests { + use codelist_rs::types::Code; + use proptest::prelude::*; + + use super::*; + + #[test] + fn valid_icd10_codes_pass_syntax() { + for ok in ["A54", "A37", "A05", "B74.0", "N40", "M10", "Q90", "K02"] { + let c = Code::from(ok); + let n = Icd10::normalize(&c).unwrap(); + Icd10::validate_syntax(&n).unwrap_or_else(|_| panic!("{ok} should pass")); + } + } + + #[test] + fn invalid_icd10_codes_fail_syntax() { + // Previously-invalid codes from the existing ICD10 validator test-suite. + // Note: "a54" was invalid in the old validator (rejected lowercase), but is + // now valid because `normalize` uppercases before `validate_syntax` runs. + // That's the intended design change. Hence it's not in this list. + for bad in ["A009000000", "1009", "AA09", "A0A9", "A00A", "A00.A", "A00X12", "A00.4AA"] { + let c = Code::from(bad); + let n = Icd10::normalize(&c).unwrap(); + assert!(Icd10::validate_syntax(&n).is_err(), "{bad} should fail"); + } + } + + #[test] + fn too_long_icd10_codes_report_length_error() { + let c = Code::from("A009000000"); + let n = Icd10::normalize(&c).unwrap(); + let err = Icd10::validate_syntax(&n).unwrap_err(); + assert!(matches!(err, crate::errors::ValidationError::InvalidLength { .. })); + } + + #[test] + fn icd10_normalize_upper_cases_and_trims() { + let c = Code::from(" a54 "); + let n = Icd10::normalize(&c).unwrap(); + assert_eq!(n.as_str(), "A54"); + Icd10::validate_syntax(&n).unwrap(); + } + + #[test] + fn icd10_normalize_rejects_empty_code() { + let c = Code::from(" "); + assert!(Icd10::normalize(&c).is_err()); + } + + fn valid_icd10() -> &'static str { + r"[A-Z][0-9]{2}(X|(\.[0-9]{1,3})?|[0-9]{1,4})?" + } + + proptest! { + #[test] + fn arbitrary_strings_match_regex_iff_validate_ok(s in "[A-Za-z0-9. X]{0,10}") { + let c = Code::from(s.as_str()); + let Ok(n) = Icd10::normalize(&c) else { return Ok(()); }; + let regex_ok = regex::Regex::new(r"^[A-Z]\d{2}(X|(\.\d{1,3})?|\d{1,4})?$") + .unwrap() + .is_match(n.as_str()) && n.as_str().len() <= 7; + let validate_ok = Icd10::validate_syntax(&n).is_ok(); + prop_assert_eq!(regex_ok, validate_ok); + } + + #[test] + fn valid_shape_icd10_validates_ok(s in valid_icd10()) { + let c = Code::from(s.as_str()); + let n = Icd10::normalize(&c).unwrap(); + prop_assert!(Icd10::validate_syntax(&n).is_ok()); + } + + #[test] + fn icd10_disallowed_chars_fail_invalid_contents( + prefix in r"[A-Z][0-9]{2}", + illegal in r"[!@#$%]", + ) { + let s = format!("{prefix}{illegal}"); + let c = Code::from(s.as_str()); + let n = Icd10::normalize(&c).unwrap(); + let err = Icd10::validate_syntax(&n).unwrap_err(); + let is_invalid_contents = matches!(err, ValidationError::InvalidContents { .. }); + prop_assert!(is_invalid_contents); + } + + #[test] + fn icd10_out_of_range_length_fails_invalid_length(s in r"[A-Z0-9.X]{8,15}") { + let c = Code::from(s.as_str()); + let n = Icd10::normalize(&c).unwrap(); + let err = Icd10::validate_syntax(&n).unwrap_err(); + let is_invalid_length = matches!(err, ValidationError::InvalidLength { .. }); + prop_assert!(is_invalid_length); + } + + #[test] + fn icd10_trim_idempotent( + s in valid_icd10(), + left in 0usize..5, + right in 0usize..5, + ) { + let padded = format!("{}{s}{}", " ".repeat(left), " ".repeat(right)); + let base = Icd10::normalize(&Code::from(s.as_str())).unwrap(); + let pad = Icd10::normalize(&Code::from(padded.as_str())).unwrap(); + prop_assert_eq!(base.as_str(), pad.as_str()); + } + } + + #[test] + fn icd10_is_truncatable_when_longer_than_three() { + let n = Icd10::normalize(&Code::from("A00.4")).unwrap(); + assert!(Icd10::is_truncatable(&n)); + let short = Icd10::normalize(&Code::from("A00")).unwrap(); + assert!(!Icd10::is_truncatable(&short)); + } + + #[test] + fn icd10_truncate_to_three_chars() { + let n = Icd10::normalize(&Code::from("A00.4")).unwrap(); + let t = Icd10::truncate(&n); + assert_eq!(t.as_str(), "A00"); + } + + #[test] + fn icd10_is_x_addable_for_three_char_codes() { + let n = Icd10::normalize(&Code::from("A00")).unwrap(); + assert!(Icd10::is_x_addable(&n)); + } + + #[test] + fn icd10_add_x_appends_x() { + let n = Icd10::normalize(&Code::from("A00")).unwrap(); + assert_eq!(Icd10::add_x(&n).as_str(), "A00X"); + } +} diff --git a/rust/codelist-systems-rs/src/lib.rs b/rust/codelist-systems-rs/src/lib.rs index 1a04eb6..a96c490 100644 --- a/rust/codelist-systems-rs/src/lib.rs +++ b/rust/codelist-systems-rs/src/lib.rs @@ -6,8 +6,11 @@ pub mod capabilities; pub mod core; +pub mod ctv3; pub mod errors; pub mod icd10; +pub mod opcs; +pub mod snomed; pub use crate::{ capabilities::{Truncatable, XExtensible}, diff --git a/rust/codelist-systems-rs/src/opcs.rs b/rust/codelist-systems-rs/src/opcs.rs new file mode 100644 index 0000000..ada6ac4 --- /dev/null +++ b/rust/codelist-systems-rs/src/opcs.rs @@ -0,0 +1,189 @@ +//! OPCS. +//! +//! Rules (preserved from the legacy `opcs_validator`): +//! 1. Length 3–5. +//! 2. First character is a letter (upper case after normalisation). +//! 3. Second and third characters are digits. +//! 4. Fourth character, if present, is `.` or a digit. +//! 5. If fourth is `.`, a digit must follow. +//! 6. Fifth character, if present, is a digit. + +use std::sync::LazyLock; + +use codelist_rs::types::{Code, CodeSystemId, NormalizedCode}; +use regex::Regex; + +use crate::{ + core::CodingSystem, + errors::{SystemError, ValidationError}, +}; + +/// OPCS coding system marker. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Opcs; + +static OPCS_REGEX: LazyLock = + LazyLock::new(|| Regex::new(r"^[A-Z]\d{2}(\.\d{1,2}|\d{1,2})?$").expect("OPCS regex compiles")); + +impl CodingSystem for Opcs { + const ID: CodeSystemId = CodeSystemId("OPCS"); + + fn normalize(code: &Code) -> Result { + let s = code.as_str().trim().to_ascii_uppercase(); + if s.is_empty() { + return Err(SystemError::normalisation("OPCS", "empty after trim")); + } + Ok(NormalizedCode::from(s)) + } + + fn validate_syntax(code: &NormalizedCode) -> Result<(), ValidationError> { + let len = code.as_str().len(); + if !(3..=5).contains(&len) { + return Err(ValidationError::invalid_length( + code.as_str().to_string(), + "OPCS".to_string(), + format!("length must be between 3 and 5 (got {len})"), + )); + } + if !OPCS_REGEX.is_match(code.as_str()) { + return Err(ValidationError::invalid_contents( + code.as_str().to_string(), + "OPCS".to_string(), + "does not match expected format".to_string(), + )); + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use codelist_rs::types::Code; + use proptest::prelude::*; + + use super::*; + + #[test] + fn valid_opcs_codes_pass_syntax() { + for ok in ["C01", "L31.4", "L35.3", "L47.4", "A01", "Z94.2", "K40.1", "B201"] { + let c = Code::from(ok); + let n = Opcs::normalize(&c).unwrap(); + Opcs::validate_syntax(&n).unwrap_or_else(|_| panic!("{ok} should pass")); + } + } + + #[test] + fn too_short_opcs_codes_fail_with_invalid_length() { + for bad in ["A0", "A", "B"] { + let c = Code::from(bad); + let n = Opcs::normalize(&c).unwrap(); + let err = Opcs::validate_syntax(&n).unwrap_err(); + assert!( + matches!(err, crate::errors::ValidationError::InvalidLength { .. }), + "{bad} should fail with InvalidLength" + ); + } + } + + #[test] + fn too_long_opcs_codes_fail_with_invalid_length() { + for bad in ["A01000", "B123456"] { + let c = Code::from(bad); + let n = Opcs::normalize(&c).unwrap(); + let err = Opcs::validate_syntax(&n).unwrap_err(); + assert!( + matches!(err, crate::errors::ValidationError::InvalidLength { .. }), + "{bad} should fail with InvalidLength" + ); + } + } + + #[test] + fn bad_content_opcs_codes_fail_with_invalid_contents() { + for bad in ["101", "AA1", "A0A", "A01.", "A01.A", "A010A"] { + let c = Code::from(bad); + let n = Opcs::normalize(&c).unwrap(); + let err = Opcs::validate_syntax(&n).unwrap_err(); + assert!( + matches!(err, crate::errors::ValidationError::InvalidContents { .. }), + "{bad} should fail with InvalidContents" + ); + } + } + + #[test] + fn opcs_normalize_trims_whitespace_and_uppercases() { + // Intentional behaviour change over the legacy validator: lowercase input + // is accepted and normalised to uppercase, so " a01 " becomes "A01". + let c = Code::from(" a01 "); + let n = Opcs::normalize(&c).unwrap(); + assert_eq!(n.as_str(), "A01"); + Opcs::validate_syntax(&n).unwrap(); + } + + #[test] + fn opcs_normalize_rejects_empty_after_trim() { + let c = Code::from(" "); + assert!(Opcs::normalize(&c).is_err()); + } + + proptest! { + #[test] + fn valid_shape_opcs_validates_ok( + s in prop_oneof![ + // 3 chars: [A-Z][0-9]{2} + r"[A-Z][0-9]{2}", + // 4 chars: [A-Z][0-9]{3} + r"[A-Z][0-9]{3}", + // 5 chars: [A-Z][0-9]{2}.[0-9] or [A-Z][0-9]{4} + r"[A-Z][0-9]{2}\.[0-9]", + r"[A-Z][0-9]{4}", + ], + ) { + let c = Code::from(s.as_str()); + let n = Opcs::normalize(&c).unwrap(); + prop_assert!(Opcs::validate_syntax(&n).is_ok()); + } + + #[test] + fn opcs_disallowed_chars_fail_invalid_contents( + prefix in r"[A-Z][0-9]{2}", + trail in r"[!@#$%]", + ) { + let s = format!("{prefix}{trail}"); + let c = Code::from(s.as_str()); + let n = Opcs::normalize(&c).unwrap(); + let err = Opcs::validate_syntax(&n).unwrap_err(); + let is_invalid_contents = matches!(err, ValidationError::InvalidContents { .. }); + prop_assert!(is_invalid_contents); + } + + #[test] + fn opcs_out_of_range_length_fails_invalid_length( + s in prop_oneof![r"[A-Z0-9.]{1,2}", r"[A-Z0-9.]{6,12}"], + ) { + let c = Code::from(s.as_str()); + let n = Opcs::normalize(&c).unwrap(); + let err = Opcs::validate_syntax(&n).unwrap_err(); + let is_invalid_length = matches!(err, ValidationError::InvalidLength { .. }); + prop_assert!(is_invalid_length); + } + + #[test] + fn opcs_trim_idempotent( + s in prop_oneof![ + r"[A-Z][0-9]{2}", + r"[A-Z][0-9]{3}", + r"[A-Z][0-9]{2}\.[0-9]", + r"[A-Z][0-9]{4}", + ], + left in 0usize..5, + right in 0usize..5, + ) { + let padded = format!("{}{s}{}", " ".repeat(left), " ".repeat(right)); + let base = Opcs::normalize(&Code::from(s.as_str())).unwrap(); + let pad = Opcs::normalize(&Code::from(padded.as_str())).unwrap(); + prop_assert_eq!(base.as_str(), pad.as_str()); + } + } +} diff --git a/rust/codelist-systems-rs/src/snomed.rs b/rust/codelist-systems-rs/src/snomed.rs new file mode 100644 index 0000000..84b74b2 --- /dev/null +++ b/rust/codelist-systems-rs/src/snomed.rs @@ -0,0 +1,164 @@ +//! SNOMED CT coding system. +//! +//! Rules (preserved from the legacy `snomed_validator`): +//! 1. The code consists of digits only. +//! 2. The code must be between 6 and 18 digits in length (inclusive). + +use codelist_rs::types::{Code, CodeSystemId, NormalizedCode}; + +use crate::{ + core::CodingSystem, + errors::{SystemError, ValidationError}, +}; + +/// SNOMED CT coding system marker. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Snomed; + +impl CodingSystem for Snomed { + const ID: CodeSystemId = CodeSystemId("SNOMED"); + + fn normalize(code: &Code) -> Result { + let s = code.as_str().trim().to_string(); + if s.is_empty() { + return Err(SystemError::normalisation("SNOMED", "empty after trim")); + } + Ok(NormalizedCode::from(s)) + } + + fn validate_syntax(code: &NormalizedCode) -> Result<(), ValidationError> { + let s = code.as_str(); + if !s.chars().all(|c| c.is_ascii_digit()) { + return Err(ValidationError::invalid_contents( + s.to_string(), + "SNOMED".to_string(), + "code must consist of digits only".to_string(), + )); + } + let len = s.len(); + if !(6..=18).contains(&len) { + return Err(ValidationError::invalid_length( + s.to_string(), + "SNOMED".to_string(), + "length must be between 6 and 18 digits".to_string(), + )); + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use codelist_rs::types::Code; + use proptest::prelude::*; + + use super::*; + + #[test] + fn valid_snomed_codes_pass_syntax() { + for ok in ["204351007", "405752007", "77480004", "34000006", "24700007", "398254007"] { + let c = Code::from(ok); + let n = Snomed::normalize(&c).unwrap(); + Snomed::validate_syntax(&n).unwrap_or_else(|_| panic!("{ok} should pass")); + } + } + + #[test] + fn too_short_snomed_codes_fail_syntax() { + for bad in ["11", "11111", "2043"] { + let c = Code::from(bad); + let n = Snomed::normalize(&c).unwrap(); + let err = Snomed::validate_syntax(&n).unwrap_err(); + assert!( + matches!(err, crate::errors::ValidationError::InvalidLength { .. }), + "{bad} should fail with InvalidLength" + ); + } + } + + #[test] + fn too_long_snomed_codes_fail_syntax() { + for bad in ["1111111111111111111111111111", "9999999999999999999"] { + let c = Code::from(bad); + let n = Snomed::normalize(&c).unwrap(); + let err = Snomed::validate_syntax(&n).unwrap_err(); + assert!( + matches!(err, crate::errors::ValidationError::InvalidLength { .. }), + "{bad} should fail with InvalidLength" + ); + } + } + + #[test] + fn non_numeric_snomed_codes_fail_syntax() { + for bad in ["AA0901", "11A6BB789A", "ABC123DEF"] { + let c = Code::from(bad); + let n = Snomed::normalize(&c).unwrap(); + let err = Snomed::validate_syntax(&n).unwrap_err(); + assert!( + matches!(err, crate::errors::ValidationError::InvalidContents { .. }), + "{bad} should fail with InvalidContents" + ); + } + } + + #[test] + fn snomed_normalize_trims_whitespace() { + let c = Code::from(" 204351007 "); + let n = Snomed::normalize(&c).unwrap(); + assert_eq!(n.as_str(), "204351007"); + Snomed::validate_syntax(&n).unwrap(); + } + + #[test] + fn snomed_normalize_rejects_empty_code() { + let c = Code::from(" "); + assert!(Snomed::normalize(&c).is_err()); + } + + proptest! { + #[test] + fn valid_shape_snomed_validates_ok(s in r"[0-9]{6,18}") { + let c = Code::from(s.as_str()); + let n = Snomed::normalize(&c).unwrap(); + prop_assert!(Snomed::validate_syntax(&n).is_ok()); + } + + #[test] + fn snomed_disallowed_chars_fail_invalid_contents( + a in r"[0-9]*", + b in r"[!@#$%a-zA-Z]+", + c in r"[0-9]*", + ) { + let s = format!("{a}{b}{c}"); + let code = Code::from(s.as_str()); + let n = Snomed::normalize(&code).unwrap(); + let err = Snomed::validate_syntax(&n).unwrap_err(); + let is_invalid_contents = matches!(err, ValidationError::InvalidContents { .. }); + prop_assert!(is_invalid_contents); + } + + #[test] + fn snomed_out_of_range_length_digits_fail_invalid_length( + s in prop_oneof![r"[0-9]{1,5}", r"[0-9]{19,25}"], + ) { + let code = Code::from(s.as_str()); + let n = Snomed::normalize(&code).unwrap(); + let err = Snomed::validate_syntax(&n).unwrap_err(); + let is_invalid_length = matches!(err, ValidationError::InvalidLength { .. }); + prop_assert!(is_invalid_length); + } + + #[test] + fn snomed_trim_idempotent( + s in r"[0-9]{6,18}", + left in 0usize..5, + right in 0usize..5, + ) { + let padded = format!("{}{s}{}", " ".repeat(left), " ".repeat(right)); + let base = Snomed::normalize(&Code::from(s.as_str())).unwrap(); + let pad = Snomed::normalize(&Code::from(padded.as_str())).unwrap(); + prop_assert_eq!(base.as_str(), pad.as_str()); + } + } +} diff --git a/rust/codelist-systems-rs/tests/icd10_rules.rs b/rust/codelist-systems-rs/tests/icd10_rules.rs deleted file mode 100644 index e87e639..0000000 --- a/rust/codelist-systems-rs/tests/icd10_rules.rs +++ /dev/null @@ -1,90 +0,0 @@ -use codelist_rs::types::Code; -use codelist_systems_rs::{CodingSystem, icd10::Icd10}; - -#[test] -fn valid_icd10_codes_pass_syntax() { - for ok in ["A54", "A37", "A05", "B74.0", "N40", "M10", "Q90", "K02"] { - let c = Code::from(ok); - let n = Icd10::normalize(&c).unwrap(); - Icd10::validate_syntax(&n).unwrap_or_else(|_| panic!("{ok} should pass")); - } -} - -#[test] -fn invalid_icd10_codes_fail_syntax() { - // Previously-invalid codes from the existing ICD10 validator test-suite. - // Note: "a54" was invalid in the old validator (rejected lowercase), but is - // now valid because `normalize` uppercases before `validate_syntax` runs. - // That's the intended design change. Hence it's not in this list. - for bad in ["A009000000", "1009", "AA09", "A0A9", "A00A", "A00.A", "A00X12", "A00.4AA"] { - let c = Code::from(bad); - let n = Icd10::normalize(&c).unwrap(); - assert!(Icd10::validate_syntax(&n).is_err(), "{bad} should fail"); - } -} - -#[test] -fn too_long_icd10_codes_report_length_error() { - let c = Code::from("A009000000"); - let n = Icd10::normalize(&c).unwrap(); - let err = Icd10::validate_syntax(&n).unwrap_err(); - assert!(matches!(err, codelist_systems_rs::ValidationError::InvalidLength { .. })); -} - -#[test] -fn icd10_normalize_upper_cases_and_trims() { - let c = Code::from(" a54 "); - let n = Icd10::normalize(&c).unwrap(); - assert_eq!(n.as_str(), "A54"); - Icd10::validate_syntax(&n).unwrap(); -} - -#[test] -fn icd10_normalize_rejects_empty_code() { - let c = Code::from(" "); - assert!(Icd10::normalize(&c).is_err()); -} - -use proptest::prelude::*; - -proptest! { - #[test] - fn arbitrary_strings_match_regex_iff_validate_ok(s in "[A-Za-z0-9. X]{0,10}") { - let c = Code::from(s.as_str()); - let Ok(n) = Icd10::normalize(&c) else { return Ok(()); }; - let regex_ok = regex::Regex::new(r"^[A-Z]\d{2}(X|(\.\d{1,3})?|\d{1,4})?$") - .unwrap() - .is_match(n.as_str()) && n.as_str().len() <= 7; - let validate_ok = Icd10::validate_syntax(&n).is_ok(); - prop_assert_eq!(regex_ok, validate_ok); - } -} - -use codelist_systems_rs::{Truncatable, XExtensible}; - -#[test] -fn icd10_is_truncatable_when_longer_than_three() { - let n = Icd10::normalize(&Code::from("A00.4")).unwrap(); - assert!(Icd10::is_truncatable(&n)); - let short = Icd10::normalize(&Code::from("A00")).unwrap(); - assert!(!Icd10::is_truncatable(&short)); -} - -#[test] -fn icd10_truncate_to_three_chars() { - let n = Icd10::normalize(&Code::from("A00.4")).unwrap(); - let t = Icd10::truncate(&n); - assert_eq!(t.as_str(), "A00"); -} - -#[test] -fn icd10_is_x_addable_for_three_char_codes() { - let n = Icd10::normalize(&Code::from("A00")).unwrap(); - assert!(Icd10::is_x_addable(&n)); -} - -#[test] -fn icd10_add_x_appends_x() { - let n = Icd10::normalize(&Code::from("A00")).unwrap(); - assert_eq!(Icd10::add_x(&n).as_str(), "A00X"); -}