Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions rust/codelist-systems-rs/proptest-regressions/icd10.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc b8fd1ddd503d1efb257d26464f3b83c1cda33850554b076891a2cb0eedd14b9d # shrinks to prefix = "A꘠𜳰", illegal = "#"
8 changes: 8 additions & 0 deletions rust/codelist-systems-rs/proptest-regressions/opcs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 210afe1c3c37afae7f591bc2d085980e1b2fcfbdd7f60a0a026b00c204949e86 # shrinks to prefix = "A٠୦", trail = "!"
cc 8608455b24a86ddb96b487e0f62dea79e29e5f8a5081f34e991271198f340204 # shrinks to s = "A00.00"
7 changes: 7 additions & 0 deletions rust/codelist-systems-rs/proptest-regressions/snomed.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 72b50e46f8988e0f128a3c3245e0edc9bc82b82a2356438e8430077315aa6e98 # shrinks to s = "٠"
194 changes: 194 additions & 0 deletions rust/codelist-systems-rs/src/ctv3.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
//! CTV3.
//!
//! Rules (preserved from the legacy `ctv3_validator`):
//! 1. Exactly 5 characters.
//! 2. Allowed characters: `a-z`, `A-Z`, `0-9`, and `.`.
//! 3. N alphanumeric characters followed by `5 - N` trailing dots (N = 0..=5).
//!
//! CTV3 is case-sensitive, so `normalize` trims whitespace only —
//! it does not fold case the way ICD10 and OPCS do.

use std::sync::LazyLock;

use codelist_rs::types::{Code, CodeSystemId, NormalizedCode};
use regex::Regex;

use crate::{
core::CodingSystem,
errors::{SystemError, ValidationError},
};

/// CTV3 coding system marker.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Ctv3;

static CTV3_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"^(?:[a-zA-Z0-9]{5}|[a-zA-Z0-9]{4}\.|[a-zA-Z0-9]{3}\.{2}|[a-zA-Z0-9]{2}\.{3}|[a-zA-Z0-9]\.{4}|\.{5})$",
)
.expect("CTV3 regex compiles")
});

impl CodingSystem for Ctv3 {
const ID: CodeSystemId = CodeSystemId("CTV3");

fn normalize(code: &Code) -> Result<NormalizedCode, SystemError> {
// CTV3 is case-sensitive — no case folding, trim only.
let s = code.as_str().trim().to_string();
if s.is_empty() {
return Err(SystemError::normalisation("CTV3", "empty after trim"));
}
Ok(NormalizedCode::from(s))
}

fn validate_syntax(code: &NormalizedCode) -> Result<(), ValidationError> {
let len = code.as_str().len();
if len != 5 {
return Err(ValidationError::invalid_length(
code.as_str().to_string(),
"CTV3".to_string(),
format!("length must be exactly 5 (got {len})"),
));
}
if !CTV3_REGEX.is_match(code.as_str()) {
return Err(ValidationError::invalid_contents(
code.as_str().to_string(),
"CTV3".to_string(),
"does not match expected format".to_string(),
));
}
Ok(())
}
}

#[cfg(test)]
mod tests {
use codelist_rs::types::Code;
use proptest::prelude::*;

use super::*;

#[test]
fn valid_ctv3_codes_pass_syntax() {
for ok in ["Af918", "ABb..", "alkif", "F....", "bn89.", "Me...", "99999", "....."] {
let c = Code::from(ok);
let n = Ctv3::normalize(&c).unwrap();
Ctv3::validate_syntax(&n).unwrap_or_else(|_| panic!("{ok} should pass"));
}
}

#[test]
fn too_short_ctv3_codes_fail_with_invalid_length() {
for bad in ["Af.", "A00A", "10"] {
let c = Code::from(bad);
let n = Ctv3::normalize(&c).unwrap();
let err = Ctv3::validate_syntax(&n).unwrap_err();
assert!(
matches!(err, crate::errors::ValidationError::InvalidLength { .. }),
"{bad} should fail with InvalidLength"
);
}
}

#[test]
fn too_long_ctv3_codes_fail_with_invalid_length() {
for bad in ["A009000000", "9874ji", "Q90....."] {
let c = Code::from(bad);
let n = Ctv3::normalize(&c).unwrap();
let err = Ctv3::validate_syntax(&n).unwrap_err();
assert!(
matches!(err, crate::errors::ValidationError::InvalidLength { .. }),
"{bad} should fail with InvalidLength"
);
}
}

#[test]
fn bad_content_ctv3_codes_fail_with_invalid_contents() {
for bad in [".a009", "10a.f", "Af!!!", "A..9k", "..9jJ", "A00.l"] {
let c = Code::from(bad);
let n = Ctv3::normalize(&c).unwrap();
let err = Ctv3::validate_syntax(&n).unwrap_err();
assert!(
matches!(err, crate::errors::ValidationError::InvalidContents { .. }),
"{bad} should fail with InvalidContents"
);
}
}

#[test]
fn ctv3_normalize_preserves_case() {
// CTV3 is case-sensitive: normalize must not fold to uppercase.
// "Af918" must remain "Af918", not become "AF918" as ICD10/OPCS would.
let c = Code::from("Af918");
let n = Ctv3::normalize(&c).unwrap();
assert_eq!(n.as_str(), "Af918");
}

#[test]
fn ctv3_normalize_trims_whitespace() {
let c = Code::from(" Af918 ");
let n = Ctv3::normalize(&c).unwrap();
assert_eq!(n.as_str(), "Af918");
Ctv3::validate_syntax(&n).unwrap();
}

#[test]
fn ctv3_normalize_rejects_empty_after_trim() {
let c = Code::from(" ");
assert!(Ctv3::normalize(&c).is_err());
}

fn valid_ctv3() -> impl Strategy<Value = String> {
(0u32..=5).prop_flat_map(|n| {
proptest::string::string_regex(&format!("[a-zA-Z0-9]{{{n}}}"))
.unwrap()
.prop_map(move |s| format!("{s}{}", ".".repeat((5 - n) as usize)))
})
}

proptest! {
#[test]
fn valid_shape_ctv3_validates_ok(s in valid_ctv3()) {
let c = Code::from(s.as_str());
let n = Ctv3::normalize(&c).unwrap();
prop_assert!(Ctv3::validate_syntax(&n).is_ok());
}

#[test]
fn ctv3_disallowed_chars_fail_invalid_contents(
illegal in r"[!@#$%]",
suffix in r"[a-zA-Z0-9.]{4}",
) {
let s = format!("{illegal}{suffix}");
let c = Code::from(s.as_str());
let n = Ctv3::normalize(&c).unwrap();
let err = Ctv3::validate_syntax(&n).unwrap_err();
let is_invalid_contents = matches!(err, ValidationError::InvalidContents { .. });
prop_assert!(is_invalid_contents);
}

#[test]
fn ctv3_out_of_range_length_fails_invalid_length(
s in prop_oneof![r"[a-zA-Z0-9.]{1,4}", r"[a-zA-Z0-9.]{6,12}"],
) {
let c = Code::from(s.as_str());
let n = Ctv3::normalize(&c).unwrap();
let err = Ctv3::validate_syntax(&n).unwrap_err();
let is_invalid_length = matches!(err, ValidationError::InvalidLength { .. });
prop_assert!(is_invalid_length);
}

#[test]
fn ctv3_trim_idempotent(
s in valid_ctv3(),
left in 0usize..5,
right in 0usize..5,
) {
let padded = format!("{}{s}{}", " ".repeat(left), " ".repeat(right));
let base = Ctv3::normalize(&Code::from(s.as_str())).unwrap();
let pad = Ctv3::normalize(&Code::from(padded.as_str())).unwrap();
prop_assert_eq!(base.as_str(), pad.as_str());
}
}
}
137 changes: 137 additions & 0 deletions rust/codelist-systems-rs/src/icd10.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,140 @@ impl XExtensible for Icd10 {
NormalizedCode::from(format!("{}X", code.as_str()))
}
}

#[cfg(test)]
mod tests {
use codelist_rs::types::Code;
use proptest::prelude::*;

use super::*;

#[test]
fn valid_icd10_codes_pass_syntax() {
for ok in ["A54", "A37", "A05", "B74.0", "N40", "M10", "Q90", "K02"] {
let c = Code::from(ok);
let n = Icd10::normalize(&c).unwrap();
Icd10::validate_syntax(&n).unwrap_or_else(|_| panic!("{ok} should pass"));
}
}

#[test]
fn invalid_icd10_codes_fail_syntax() {
// Previously-invalid codes from the existing ICD10 validator test-suite.
// Note: "a54" was invalid in the old validator (rejected lowercase), but is
// now valid because `normalize` uppercases before `validate_syntax` runs.
// That's the intended design change. Hence it's not in this list.
for bad in ["A009000000", "1009", "AA09", "A0A9", "A00A", "A00.A", "A00X12", "A00.4AA"] {
let c = Code::from(bad);
let n = Icd10::normalize(&c).unwrap();
assert!(Icd10::validate_syntax(&n).is_err(), "{bad} should fail");
}
}

#[test]
fn too_long_icd10_codes_report_length_error() {
let c = Code::from("A009000000");
let n = Icd10::normalize(&c).unwrap();
let err = Icd10::validate_syntax(&n).unwrap_err();
assert!(matches!(err, crate::errors::ValidationError::InvalidLength { .. }));
}

#[test]
fn icd10_normalize_upper_cases_and_trims() {
let c = Code::from(" a54 ");
let n = Icd10::normalize(&c).unwrap();
assert_eq!(n.as_str(), "A54");
Icd10::validate_syntax(&n).unwrap();
}

#[test]
fn icd10_normalize_rejects_empty_code() {
let c = Code::from(" ");
assert!(Icd10::normalize(&c).is_err());
}

fn valid_icd10() -> &'static str {
r"[A-Z][0-9]{2}(X|(\.[0-9]{1,3})?|[0-9]{1,4})?"
}

proptest! {
#[test]
fn arbitrary_strings_match_regex_iff_validate_ok(s in "[A-Za-z0-9. X]{0,10}") {
let c = Code::from(s.as_str());
let Ok(n) = Icd10::normalize(&c) else { return Ok(()); };
let regex_ok = regex::Regex::new(r"^[A-Z]\d{2}(X|(\.\d{1,3})?|\d{1,4})?$")
.unwrap()
.is_match(n.as_str()) && n.as_str().len() <= 7;
let validate_ok = Icd10::validate_syntax(&n).is_ok();
prop_assert_eq!(regex_ok, validate_ok);
}

#[test]
fn valid_shape_icd10_validates_ok(s in valid_icd10()) {
let c = Code::from(s.as_str());
let n = Icd10::normalize(&c).unwrap();
prop_assert!(Icd10::validate_syntax(&n).is_ok());
}

#[test]
fn icd10_disallowed_chars_fail_invalid_contents(
prefix in r"[A-Z][0-9]{2}",
illegal in r"[!@#$%]",
) {
let s = format!("{prefix}{illegal}");
let c = Code::from(s.as_str());
let n = Icd10::normalize(&c).unwrap();
let err = Icd10::validate_syntax(&n).unwrap_err();
let is_invalid_contents = matches!(err, ValidationError::InvalidContents { .. });
prop_assert!(is_invalid_contents);
}

#[test]
fn icd10_out_of_range_length_fails_invalid_length(s in r"[A-Z0-9.X]{8,15}") {
let c = Code::from(s.as_str());
let n = Icd10::normalize(&c).unwrap();
let err = Icd10::validate_syntax(&n).unwrap_err();
let is_invalid_length = matches!(err, ValidationError::InvalidLength { .. });
prop_assert!(is_invalid_length);
}

#[test]
fn icd10_trim_idempotent(
s in valid_icd10(),
left in 0usize..5,
right in 0usize..5,
) {
let padded = format!("{}{s}{}", " ".repeat(left), " ".repeat(right));
let base = Icd10::normalize(&Code::from(s.as_str())).unwrap();
let pad = Icd10::normalize(&Code::from(padded.as_str())).unwrap();
prop_assert_eq!(base.as_str(), pad.as_str());
}
}

#[test]
fn icd10_is_truncatable_when_longer_than_three() {
let n = Icd10::normalize(&Code::from("A00.4")).unwrap();
assert!(Icd10::is_truncatable(&n));
let short = Icd10::normalize(&Code::from("A00")).unwrap();
assert!(!Icd10::is_truncatable(&short));
}

#[test]
fn icd10_truncate_to_three_chars() {
let n = Icd10::normalize(&Code::from("A00.4")).unwrap();
let t = Icd10::truncate(&n);
assert_eq!(t.as_str(), "A00");
}

#[test]
fn icd10_is_x_addable_for_three_char_codes() {
let n = Icd10::normalize(&Code::from("A00")).unwrap();
assert!(Icd10::is_x_addable(&n));
}

#[test]
fn icd10_add_x_appends_x() {
let n = Icd10::normalize(&Code::from("A00")).unwrap();
assert_eq!(Icd10::add_x(&n).as_str(), "A00X");
}
}
3 changes: 3 additions & 0 deletions rust/codelist-systems-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@

pub mod capabilities;
pub mod core;
pub mod ctv3;
pub mod errors;
pub mod icd10;
pub mod opcs;
pub mod snomed;

pub use crate::{
capabilities::{Truncatable, XExtensible},
Expand Down
Loading
Loading