From 467e6c37540982071d82d37f18fd2dbd097162ea Mon Sep 17 00:00:00 2001 From: CarolineMorton Date: Fri, 24 Apr 2026 13:37:31 +0100 Subject: [PATCH 1/6] Add .claude/ to .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 5fe4768..9d30c81 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,9 @@ Cargo.lock .DS_Store todo.txt +# Claude Code +.claude/ + ## Python ## __pycache__/ venv/ From 19f6e644812083f14f226597e752a986ba2dbb09 Mon Sep 17 00:00:00 2001 From: CarolineMorton Date: Fri, 24 Apr 2026 13:44:46 +0100 Subject: [PATCH 2/6] cargo fmt --- .../src/snomed_usage_data.rs | 56 ++++++++++++------- .../tests/download_usage.rs | 3 +- .../src/ctv3_validator.rs | 3 +- .../src/icd10_validator.rs | 3 +- .../src/opcs_validator.rs | 3 +- rust/codelist-validator-rs/src/validator.rs | 11 ++-- 6 files changed, 50 insertions(+), 29 deletions(-) diff --git a/rust/codelist-builder-rs/src/snomed_usage_data.rs b/rust/codelist-builder-rs/src/snomed_usage_data.rs index 6acf5a3..69e96dc 100644 --- a/rust/codelist-builder-rs/src/snomed_usage_data.rs +++ b/rust/codelist-builder-rs/src/snomed_usage_data.rs @@ -4,47 +4,65 @@ //! Components of the file: //! //! SNOMED_Concept_ID: -//! SNOMED concepts which have been added to a patient record in a general practice system during the reporting period. +//! SNOMED concepts which have been added to a patient record in a general +//! practice system during the reporting period. //! //! Description: -//! The fully specified name associated with the SNOMED_Concept_ID on the final day of the reporting period (31 July). +//! The fully specified name associated with the SNOMED_Concept_ID on the final +//! day of the reporting period (31 July). //! //! Usage: -//! The number of times that the SNOMED_Concept_ID was added into any patient record within the reporting period, rounded to the nearerst 10. Usage of 1 to 4 is displayed as *. SNOMED concepts with no code usage are not included. +//! The number of times that the SNOMED_Concept_ID was added into any patient +//! record within the reporting period, rounded to the nearerst 10. Usage of 1 +//! to 4 is displayed as *. SNOMED concepts with no code usage are not included. //! Important notes: -//! - Data prior to 2019 was originally submitted mostly in READ V2 or CTV3, but in the usage files, these codes have been mapped to corresponding SNOMED codes using final 2020 version of the mapping tables published by NHS England. -//! - The usage does not show how many patients had each code added to their record - each addition regardless of whether it is the same patient increments the count by 1. Therefore it is not possible to infer the number of individual patients with a particular code. -//! - For the 2011-12 to 2017-18 data, it is stated that "Current maximum value is approximately 250,000,000" - no such maximum is stated for the 2018-19 onwards data. +//! - Data prior to 2019 was originally submitted mostly in READ V2 or CTV3, but +//! in the usage files, these codes have been mapped to corresponding SNOMED +//! codes using final 2020 version of the mapping tables published by NHS +//! England. +//! - The usage does not show how many patients had each code added to their +//! record - each addition regardless of whether it is the same patient +//! increments the count by 1. Therefore it is not possible to infer the +//! number of individual patients with a particular code. +//! - For the 2011-12 to 2017-18 data, it is stated that "Current maximum value +//! is approximately 250,000,000" - no such maximum is stated for the 2018-19 +//! onwards data. //! //! Active_at_Start: -//! Active status of the SNOMED_Concept_ID on the first day of the reporting period. This is taken from the most recent UK clinical extension, or associated International extention, which was published up to the start of the reporting year (1 August). -//! 1 = SNOMED concept was published and was active. -//! 0 = SNOMED concept was either not yet available or was inactive. +//! Active status of the SNOMED_Concept_ID on the first day of the reporting +//! period. This is taken from the most recent UK clinical extension, or +//! associated International extention, which was published up to the start of +//! the reporting year (1 August). 1 = SNOMED concept was published and was +//! active. 0 = SNOMED concept was either not yet available or was inactive. //! //! Active_at_End: -//! Active status of the SNOMED_Concept_ID on the last day of the reporting period. This is taken from the most recent UK clinical extension, or associated International extention, which was published up to the end of the reporting year (31 July). -//! 1 = SNOMED concept was published and was active. +//! Active status of the SNOMED_Concept_ID on the last day of the reporting +//! period. This is taken from the most recent UK clinical extension, or +//! associated International extention, which was published up to the end of the +//! reporting year (31 July). 1 = SNOMED concept was published and was active. //! 0 = SNOMED concept was either not yet available or was inactive. -use std::fs; - -// Internal imports -use crate::errors::CodeListBuilderError; +use std::{collections::HashMap, fs}; // External imports use csv; use reqwest; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; + +// Internal imports +use crate::errors::CodeListBuilderError; /// Struct to represent a snomed usage data entry /// /// # Fields /// * `snomed_concept_id` - The snomed concept id /// * `description` - The description -/// * `usage` - The usage. A count of 1-4 is denoted by a *. Counts above 4 are denoted by a number rounded to the nearest 10. -/// * `active_at_start` - Whether the concept was active at the start of the usage period -/// * `active_at_end` - Whether the concept was active at the end of the usage period +/// * `usage` - The usage. A count of 1-4 is denoted by a *. Counts above 4 are +/// denoted by a number rounded to the nearest 10. +/// * `active_at_start` - Whether the concept was active at the start of the +/// usage period +/// * `active_at_end` - Whether the concept was active at the end of the usage +/// period #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct SnomedUsageDataEntry { pub snomed_concept_id: String, diff --git a/rust/codelist-builder-rs/tests/download_usage.rs b/rust/codelist-builder-rs/tests/download_usage.rs index fec99e0..2171d93 100644 --- a/rust/codelist-builder-rs/tests/download_usage.rs +++ b/rust/codelist-builder-rs/tests/download_usage.rs @@ -1,5 +1,4 @@ -use codelist_builder_rs::errors::CodeListBuilderError; -use codelist_builder_rs::snomed_usage_data::SnomedUsageData; +use codelist_builder_rs::{errors::CodeListBuilderError, snomed_usage_data::SnomedUsageData}; #[tokio::test] async fn test_download_usage() -> Result<(), CodeListBuilderError> { diff --git a/rust/codelist-validator-rs/src/ctv3_validator.rs b/rust/codelist-validator-rs/src/ctv3_validator.rs index 0f9c80c..2556217 100644 --- a/rust/codelist-validator-rs/src/ctv3_validator.rs +++ b/rust/codelist-validator-rs/src/ctv3_validator.rs @@ -3,7 +3,8 @@ //! Validation Rules //! 1. The code must be exactly 5 characters in length. //! 2. Only alphanumeric characters (a-z, A-Z, 0-9) and dots (.) are allowed. -//! 3. The code starts with 0-5 alphanumeric characters followed by dots to pad to 5 characters. +//! 3. The code starts with 0-5 alphanumeric characters followed by dots to pad +//! to 5 characters. use std::sync::LazyLock; use codelist_rs::codelist::CodeList; diff --git a/rust/codelist-validator-rs/src/icd10_validator.rs b/rust/codelist-validator-rs/src/icd10_validator.rs index f7d6b95..93f3b38 100644 --- a/rust/codelist-validator-rs/src/icd10_validator.rs +++ b/rust/codelist-validator-rs/src/icd10_validator.rs @@ -5,7 +5,8 @@ //! 2. The first character must be a letter. //! 3. The second and third characters must be numbers. //! 4. The fourth character must be a dot, or a number or X. -//! 5. If the fourth character is a dot, there must be at least 1 number after the dot. +//! 5. If the fourth character is a dot, there must be at least 1 number after +//! the dot. //! 6. If the fourth character is a X, there are no further characters. //! 7. The fifth to seventh characters must be numbers if present. use std::sync::LazyLock; diff --git a/rust/codelist-validator-rs/src/opcs_validator.rs b/rust/codelist-validator-rs/src/opcs_validator.rs index 71989dc..679a8c6 100644 --- a/rust/codelist-validator-rs/src/opcs_validator.rs +++ b/rust/codelist-validator-rs/src/opcs_validator.rs @@ -4,7 +4,8 @@ //! 1. The code must be 3-5 characters long. //! 2. The first character must be a letter. //! 3. The second and third characters must be numbers. -//! 4. If there is a fourth character and it is a dot, there must be a number after the dot. +//! 4. If there is a fourth character and it is a dot, there must be a number +//! after the dot. //! 5. The fifth character, if present, is a number. use std::sync::LazyLock; diff --git a/rust/codelist-validator-rs/src/validator.rs b/rust/codelist-validator-rs/src/validator.rs index 8b6f8ac..3939812 100644 --- a/rust/codelist-validator-rs/src/validator.rs +++ b/rust/codelist-validator-rs/src/validator.rs @@ -1,7 +1,6 @@ //! Generic trait for validating a codelist -use regex::Regex; - use codelist_rs::{codelist::CodeList, types::CodeListType}; +use regex::Regex; use crate::{ ctv3_validator::Ctv3Validator, errors::CodeListValidatorError, icd10_validator::IcdValidator, @@ -43,7 +42,8 @@ impl Validator for CodeList { /// * `regex` - The regex to use to validate the codes /// /// # Returns -/// * `Result<(), CodeListValidatorError>` - Ok(()) if all codes match the custom regex pattern, Err(CodeListValidatorError) otherwise +/// * `Result<(), CodeListValidatorError>` - Ok(()) if all codes match the +/// custom regex pattern, Err(CodeListValidatorError) otherwise fn custom_validate_all_code(codelist: &CodeList, re: &Regex) -> Result<(), CodeListValidatorError> { let mut reasons = Vec::new(); for (code, _) in codelist.entries.iter() { @@ -68,15 +68,16 @@ fn custom_validate_all_code(codelist: &CodeList, re: &Regex) -> Result<(), CodeL #[cfg(test)] mod tests { + use std::sync::LazyLock; + use codelist_rs::{ codelist::CodeList, codelist_options::CodeListOptions, errors::CodeListError, metadata::Metadata, types::CodeListType, }; + use regex::Regex; use super::*; use crate::validator::Validator; - use regex::Regex; - use std::sync::LazyLock; static TEST_REGEX: LazyLock = LazyLock::new(|| Regex::new(r"^B\d{2}$").expect("Failed to compile test regex")); From 8505ecc5046a6d67f4f2dccb996db334da6fcce7 Mon Sep 17 00:00:00 2001 From: CarolineMorton Date: Fri, 24 Apr 2026 13:58:18 +0100 Subject: [PATCH 3/6] Fix test-python: use maturin develop so tests see current Rust code maturin build produces a wheel but does not install it into the active venv, so tests were running against whatever was last manually installed. Use maturin develop instead, which builds and installs into the active venv. Requires sourcing bindings/python/.venv/bin/activate before just ci. --- Justfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Justfile b/Justfile index d077e85..7683ffd 100644 --- a/Justfile +++ b/Justfile @@ -21,11 +21,12 @@ prettier-check: # CI task: check formatting and linting and all tests ci: fmt-check clippy prettier-check test-python test-rust -# Run python tests +# Run python tests. +# Requires an active Python venv (run `source bindings/python/.venv/bin/activate` +# first). `maturin develop` builds the extension and installs it into the +# active venv so the tests exercise the current Rust code, not a stale wheel. test-python: - echo "Build python library" - maturin build --manifest-path bindings/python/Cargo.toml - echo "Run python tests" + maturin develop --manifest-path bindings/python/Cargo.toml sh bindings/python/tests/run.sh # Run rust tests From 54e1c32faa9d568296e87a8fdfab84d38564a3a0 Mon Sep 17 00:00:00 2001 From: CarolineMorton Date: Fri, 24 Apr 2026 14:22:48 +0100 Subject: [PATCH 4/6] Add Code newtype to types module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newtype wraps String with compile-time distinction only — no content validation at this layer. serde(transparent) keeps the wire format as a plain JSON string. Canonical conversions via From, From<&str>, and From for String. --- rust/codelist-rs/src/types.rs | 89 ++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 6 deletions(-) diff --git a/rust/codelist-rs/src/types.rs b/rust/codelist-rs/src/types.rs index c69a298..9dda39c 100644 --- a/rust/codelist-rs/src/types.rs +++ b/rust/codelist-rs/src/types.rs @@ -1,13 +1,64 @@ -//! This file defines the different types of codelists that can be used +//! Public types for the codelist crate. +//! +//! Includes: +//! - [`CodeListType`] — the runtime tag identifying which coding system a +//! [`crate::codelist::CodeList`] belongs to. +//! - Newtype wrappers such as [`Code`] that give compile-time type safety to +//! values that would otherwise just be `String`. +//! +//! The newtypes do NOT implement `Deref`. Callers convert +//! explicitly with `as_str()` (or via `String::from(x)` when they need an +//! owned `String`). This keeps "is this a `Term` or a `Code`?" answerable +//! at compile time. -/// External imports -use std::str::FromStr; +use std::{fmt, str::FromStr}; use serde::{Deserialize, Serialize}; -/// Internal imports use crate::errors::CodeListError; +/// A raw clinical code as given by a caller, before any coding-system +/// specific normalisation. +/// +/// Construction is infallible — `Code` carries no content invariants +/// beyond "it is a `String` that a caller intended as a code". Content +/// validation happens later, against the relevant `CodingSystem` (see the +/// `codelist-systems-rs` crate) or at the point it's added to a `CodeList`. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(transparent)] +pub struct Code(String); + +impl Code { + /// Borrow the inner string slice. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl From for Code { + fn from(s: String) -> Self { + Self(s) + } +} + +impl From<&str> for Code { + fn from(s: &str) -> Self { + Self(s.to_string()) + } +} + +impl From for String { + fn from(c: Code) -> Self { + c.0 + } +} + +impl fmt::Display for Code { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + /// Enum to represent the different types of codelists /// /// # Variants @@ -63,8 +114,6 @@ impl FromStr for CodeListType { } } -use std::fmt; - /// Implement `Display` for `CodeListType` so it automatically supports /// `to_string()` impl fmt::Display for CodeListType { @@ -83,6 +132,34 @@ impl fmt::Display for CodeListType { mod tests { use super::*; + #[test] + fn code_round_trips_string_borrow_and_owned() { + let c = Code::from("A54".to_string()); + assert_eq!(c.as_str(), "A54"); + assert_eq!(String::from(c), "A54"); + } + + #[test] + fn code_from_str_and_string_are_equivalent() { + assert_eq!(Code::from("A54"), Code::from("A54".to_string())); + assert_eq!(Code::from("A54").as_str(), "A54"); + assert_eq!(Code::from("A54".to_string()).as_str(), "A54"); + } + + #[test] + fn code_serialises_transparently_as_json_string() { + let c = Code::from("A54"); + let json = serde_json::to_string(&c).unwrap(); + assert_eq!(json, "\"A54\""); + let back: Code = serde_json::from_str("\"A54\"").unwrap(); + assert_eq!(back, c); + } + + #[test] + fn code_displays_as_inner() { + assert_eq!(Code::from("A54").to_string(), "A54"); + } + #[test] fn test_from_str() { assert!(matches!(CodeListType::from_str("icd10"), Ok(CodeListType::ICD10))); From 2cf92469d1a523c194021f8fde97df16686f6155 Mon Sep 17 00:00:00 2001 From: CarolineMorton Date: Fri, 24 Apr 2026 14:30:51 +0100 Subject: [PATCH 5/6] Add NormalizedCode, Term, CodeSystemId, Contributor, CodeListId newtypes --- rust/codelist-rs/src/types.rs | 232 ++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) diff --git a/rust/codelist-rs/src/types.rs b/rust/codelist-rs/src/types.rs index 9dda39c..329783a 100644 --- a/rust/codelist-rs/src/types.rs +++ b/rust/codelist-rs/src/types.rs @@ -59,6 +59,173 @@ impl fmt::Display for Code { } } +/// A code after a `CodingSystem` has normalised it (e.g. uppercased, +/// trimmed). Semantically distinct from [`Code`]: the system guarantees +/// this value has been through `CodingSystem::normalize`. Construct via +/// `From` inside system impls; elsewhere, obtain one by calling +/// the system's normalize function. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(transparent)] +pub struct NormalizedCode(String); + +impl NormalizedCode { + /// Borrow the inner string slice. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl From for NormalizedCode { + fn from(s: String) -> Self { + Self(s) + } +} + +impl From<&str> for NormalizedCode { + fn from(s: &str) -> Self { + Self(s.to_string()) + } +} + +impl From for String { + fn from(n: NormalizedCode) -> Self { + n.0 + } +} + +impl fmt::Display for NormalizedCode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +/// A human-readable label associated with a [`Code`] (e.g. "Gonorrhoea"). +/// Kept separate from `Code` so a function signature like +/// `fn add(code: Code, term: Term)` is unambiguous at the call site — +/// the two cannot be accidentally transposed. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(transparent)] +pub struct Term(String); + +impl Term { + /// Borrow the inner string slice. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl From for Term { + fn from(s: String) -> Self { + Self(s) + } +} + +impl From<&str> for Term { + fn from(s: &str) -> Self { + Self(s.to_string()) + } +} + +impl From for String { + fn from(t: Term) -> Self { + t.0 + } +} + +impl fmt::Display for Term { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +/// Identifier for a coding system (ICD10, SNOMED, ...). Uses `&'static str` +/// so each system can expose a `const ID: CodeSystemId` on its +/// `CodingSystem` impl. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct CodeSystemId(pub &'static str); + +impl fmt::Display for CodeSystemId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.0) + } +} + +/// Someone who contributed to a codelist (e.g. a name or handle). +/// Distinct from an arbitrary `String` so contributor collections cannot +/// accidentally mix with other string-typed metadata fields. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(transparent)] +pub struct Contributor(String); + +impl Contributor { + /// Borrow the inner string slice. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl From for Contributor { + fn from(s: String) -> Self { + Self(s) + } +} + +impl From<&str> for Contributor { + fn from(s: &str) -> Self { + Self(s.to_string()) + } +} + +impl From for String { + fn from(c: Contributor) -> Self { + c.0 + } +} + +impl fmt::Display for Contributor { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +/// Stable identifier for a codelist. Distinct from an arbitrary `String` +/// so cross-codelist references (e.g. merge records) are typed rather +/// than stringly-typed. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(transparent)] +pub struct CodeListId(String); + +impl CodeListId { + /// Borrow the inner string slice. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl From for CodeListId { + fn from(s: String) -> Self { + Self(s) + } +} + +impl From<&str> for CodeListId { + fn from(s: &str) -> Self { + Self(s.to_string()) + } +} + +impl From for String { + fn from(id: CodeListId) -> Self { + id.0 + } +} + +impl fmt::Display for CodeListId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + /// Enum to represent the different types of codelists /// /// # Variants @@ -185,4 +352,69 @@ mod tests { assert_eq!(CodeListType::OPCS.to_string(), "OPCS"); assert_eq!(CodeListType::CTV3.to_string(), "CTV3"); } + + #[test] + fn normalized_code_round_trips() { + let n = NormalizedCode::from("A54"); + assert_eq!(n.as_str(), "A54"); + assert_eq!(String::from(n), "A54"); + } + + #[test] + fn normalized_code_serialises_transparently_as_json_string() { + let n = NormalizedCode::from("A54"); + assert_eq!(serde_json::to_string(&n).unwrap(), "\"A54\""); + let back: NormalizedCode = serde_json::from_str("\"A54\"").unwrap(); + assert_eq!(back, n); + } + + #[test] + fn term_round_trips() { + let t = Term::from("Gonorrhoea"); + assert_eq!(t.as_str(), "Gonorrhoea"); + assert_eq!(String::from(t), "Gonorrhoea"); + } + + #[test] + fn term_serialises_transparently_as_json_string() { + let t = Term::from("Gonorrhoea"); + assert_eq!(serde_json::to_string(&t).unwrap(), "\"Gonorrhoea\""); + let back: Term = serde_json::from_str("\"Gonorrhoea\"").unwrap(); + assert_eq!(back, t); + } + + #[test] + fn code_system_id_displays() { + assert_eq!(CodeSystemId("ICD10").to_string(), "ICD10"); + } + + #[test] + fn contributor_round_trips() { + let c = Contributor::from("Caroline Morton"); + assert_eq!(c.as_str(), "Caroline Morton"); + assert_eq!(String::from(c), "Caroline Morton"); + } + + #[test] + fn contributor_serialises_transparently_as_json_string() { + let c = Contributor::from("Caroline Morton"); + assert_eq!(serde_json::to_string(&c).unwrap(), "\"Caroline Morton\""); + let back: Contributor = serde_json::from_str("\"Caroline Morton\"").unwrap(); + assert_eq!(back, c); + } + + #[test] + fn codelist_id_round_trips() { + let id = CodeListId::from("my-codelist-123"); + assert_eq!(id.as_str(), "my-codelist-123"); + assert_eq!(String::from(id), "my-codelist-123"); + } + + #[test] + fn codelist_id_serialises_transparently_as_json_string() { + let id = CodeListId::from("my-codelist-123"); + assert_eq!(serde_json::to_string(&id).unwrap(), "\"my-codelist-123\""); + let back: CodeListId = serde_json::from_str("\"my-codelist-123\"").unwrap(); + assert_eq!(back, id); + } } From ca488aa60cc08799a41f00c0184d7572b2284ed2 Mon Sep 17 00:00:00 2001 From: CarolineMorton Date: Fri, 24 Apr 2026 14:59:53 +0100 Subject: [PATCH 6/6] Migrate Provenance.contributors to IndexSet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeListError::ContributorNotFound now carries a Contributor (not String), so remove_contributor takes Contributor by value and moves it into the error on the not-found path — no as_str().to_string() dance. Python binding's add_contributor, remove_contributor, and the Provenance::new call site in PyCodeList::new wrap incoming strings with Contributor::from at the FFI boundary. Python-facing signatures remain String; the binding exposes contributors as native strings via as_str(). --- bindings/python/src/codelist.rs | 12 ++-- rust/codelist-rs/src/errors.rs | 4 +- rust/codelist-rs/src/metadata/metadata.rs | 10 +-- rust/codelist-rs/src/metadata/provenance.rs | 67 +++++++++++++-------- 4 files changed, 57 insertions(+), 36 deletions(-) diff --git a/bindings/python/src/codelist.rs b/bindings/python/src/codelist.rs index 5be82ac..f325574 100755 --- a/bindings/python/src/codelist.rs +++ b/bindings/python/src/codelist.rs @@ -10,7 +10,7 @@ use codelist_rs::{ CategorisationAndUsage, Metadata, Provenance, PurposeAndContext, Source, ValidationAndReview, }, - types::CodeListType, + types::{CodeListType, Contributor}, }; use codelist_validator_rs::validator::Validator; use indexmap::IndexSet; @@ -63,7 +63,9 @@ impl PyCodeList { })?; // convert authors vec to IndexSet let authors_set = authors - .map(|authors| authors.into_iter().collect::>()) + .map(|authors| { + authors.into_iter().map(Contributor::from).collect::>() + }) .unwrap_or_default(); let provenance = Provenance::new(source, Some(authors_set)); let categorisation_and_usage = CategorisationAndUsage::new(None, None, None); @@ -114,7 +116,7 @@ impl PyCodeList { /// Add a contributor to the codelist's provenance fn add_contributor(&mut self, contributor: String) -> PyResult<()> { - self.inner.metadata.provenance.add_contributor(contributor); + self.inner.metadata.provenance.add_contributor(Contributor::from(contributor)); Ok(()) } @@ -123,7 +125,7 @@ impl PyCodeList { self.inner .metadata .provenance - .remove_contributor(contributor) + .remove_contributor(Contributor::from(contributor)) .map_err(|e| PyValueError::new_err(e.to_string()))?; Ok(()) } @@ -132,7 +134,7 @@ impl PyCodeList { fn contributors(&self, py: Python) -> PyResult { let py_set = PySet::new(py, &[] as &[String])?; for contributor in &self.inner.metadata.provenance.contributors { - py_set.add(contributor)?; + py_set.add(contributor.as_str())?; } Ok(py_set.into()) } diff --git a/rust/codelist-rs/src/errors.rs b/rust/codelist-rs/src/errors.rs index 25c4f40..e085a18 100644 --- a/rust/codelist-rs/src/errors.rs +++ b/rust/codelist-rs/src/errors.rs @@ -5,6 +5,8 @@ use std::io; use csv; use serde_json; +use crate::types::Contributor; + /// Enum to represent the different types of errors that can occur in the /// codelist library @@ -53,7 +55,7 @@ pub enum CodeListError { CodeEntryTermDoesNotExist { code: String, msg: String }, #[error("Contributor {contributor} not found")] - ContributorNotFound { contributor: String }, + ContributorNotFound { contributor: Contributor }, #[error("Invalid metadata source: {source_string}")] InvalidMetadataSource { source_string: String }, diff --git a/rust/codelist-rs/src/metadata/metadata.rs b/rust/codelist-rs/src/metadata/metadata.rs index a49c294..40496b6 100644 --- a/rust/codelist-rs/src/metadata/metadata.rs +++ b/rust/codelist-rs/src/metadata/metadata.rs @@ -58,7 +58,7 @@ mod tests { use indexmap::IndexSet; use super::*; - use crate::{errors::CodeListError, metadata::Source}; + use crate::{errors::CodeListError, metadata::Source, types::Contributor}; // helper function to get the time difference between the current time and the // given date @@ -69,8 +69,10 @@ mod tests { #[test] fn test_new() -> Result<(), CodeListError> { - let provenance = - Provenance::new(Source::ManuallyCreated, Some(IndexSet::from(["Test".to_string()]))); + let provenance = Provenance::new( + Source::ManuallyCreated, + Some(IndexSet::from([Contributor::from("Test")])), + ); let categorisation_and_usage = CategorisationAndUsage::new( Some(HashSet::from(["tag1".to_string()])), Some(HashSet::from(["usage1".to_string()])), @@ -100,7 +102,7 @@ mod tests { assert!(time_difference < 1000); let time_difference = get_time_difference(metadata.provenance.last_modified_date); assert!(time_difference < 1000); - assert_eq!(metadata.provenance.contributors, IndexSet::from(["Test".to_string()])); + assert_eq!(metadata.provenance.contributors, IndexSet::from([Contributor::from("Test")])); assert_eq!(metadata.categorisation_and_usage.tags, HashSet::from(["tag1".to_string()])); assert_eq!(metadata.categorisation_and_usage.usage, HashSet::from(["usage1".to_string()])); diff --git a/rust/codelist-rs/src/metadata/provenance.rs b/rust/codelist-rs/src/metadata/provenance.rs index c1aafbb..5e0e77c 100644 --- a/rust/codelist-rs/src/metadata/provenance.rs +++ b/rust/codelist-rs/src/metadata/provenance.rs @@ -10,14 +10,14 @@ use serde::{Deserialize, Serialize}; // Internal imports use crate::errors::CodeListError; -use crate::metadata::metadata_source::Source; +use crate::{metadata::metadata_source::Source, types::Contributor}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Provenance { pub source: Source, pub created_date: chrono::DateTime, pub last_modified_date: chrono::DateTime, - pub contributors: IndexSet, + pub contributors: IndexSet, } impl Default for Provenance { @@ -31,7 +31,7 @@ impl Provenance { /// /// # Arguments /// * `source` - The source of the codelist - pub fn new(source: Source, contributors: Option>) -> Provenance { + pub fn new(source: Source, contributors: Option>) -> Provenance { Self { source, created_date: Utc::now(), @@ -53,7 +53,7 @@ impl Provenance { /// # Arguments /// * `self` - The provenance to update /// * `contributor` - The contributor to add - pub fn add_contributor(&mut self, contributor: String) { + pub fn add_contributor(&mut self, contributor: Contributor) { self.contributors.insert(contributor); } @@ -62,7 +62,7 @@ impl Provenance { /// # Arguments /// * `self` - The provenance to update /// * `contributor` - The contributor to remove - pub fn remove_contributor(&mut self, contributor: String) -> Result<(), CodeListError> { + pub fn remove_contributor(&mut self, contributor: Contributor) -> Result<(), CodeListError> { if self.contributors.shift_remove(&contributor) { Ok(()) } else { @@ -88,7 +88,7 @@ mod tests { fn create_test_provenance_with_contributors() -> Provenance { let mut contributors = IndexSet::new(); - contributors.insert("Example Contributor".to_string()); + contributors.insert(Contributor::from("Example Contributor")); Provenance::new(Source::LoadedFromFile, Some(contributors)) } @@ -100,14 +100,17 @@ mod tests { assert!(time_difference < 1000); let time_difference = get_time_difference(provenance.last_modified_date); assert!(time_difference < 1000); - assert_eq!(provenance.contributors, IndexSet::new()); + assert_eq!(provenance.contributors, IndexSet::::new()); } #[test] fn test_new_provenance_with_contributors() { let provenance = create_test_provenance_with_contributors(); assert_eq!(provenance.source, Source::LoadedFromFile); - assert_eq!(provenance.contributors, IndexSet::from(["Example Contributor".to_string()])); + assert_eq!( + provenance.contributors, + IndexSet::from([Contributor::from("Example Contributor")]) + ); let time_difference = get_time_difference(provenance.created_date); assert!(time_difference < 1000); let time_difference = get_time_difference(provenance.last_modified_date); @@ -125,23 +128,35 @@ mod tests { #[test] fn test_add_contributor() { let mut provenance = create_test_provenance_no_contributors(); - provenance.add_contributor("Example Contributor".to_string()); - assert_eq!(provenance.contributors, IndexSet::from(["Example Contributor".to_string()])); + provenance.add_contributor(Contributor::from("Example Contributor")); + assert_eq!( + provenance.contributors, + IndexSet::from([Contributor::from("Example Contributor")]) + ); + } + + #[test] + fn add_contributor_uses_newtype() { + let mut p = create_test_provenance_no_contributors(); + let c = Contributor::from("Caroline"); + p.add_contributor(c.clone()); + assert!(p.contributors.contains(&c)); } #[test] fn test_remove_contributor() -> Result<(), CodeListError> { let mut provenance = create_test_provenance_with_contributors(); - provenance.add_contributor("Example Contributor".to_string()); - provenance.remove_contributor("Example Contributor".to_string())?; - assert_eq!(provenance.contributors, IndexSet::new()); + provenance.add_contributor(Contributor::from("Example Contributor")); + provenance.remove_contributor(Contributor::from("Example Contributor"))?; + assert_eq!(provenance.contributors, IndexSet::::new()); Ok(()) } #[test] fn test_remove_contributor_not_found() { let mut provenance = create_test_provenance_no_contributors(); - let error = provenance.remove_contributor("Example Contributor".to_string()).unwrap_err(); + let error = + provenance.remove_contributor(Contributor::from("Example Contributor")).unwrap_err(); let error_string = error.to_string(); assert_eq!(error_string, "Contributor Example Contributor not found"); } @@ -150,35 +165,35 @@ mod tests { fn test_contributors_order_is_maintained() -> Result<(), CodeListError> { let mut provenance = create_test_provenance_no_contributors(); - provenance.add_contributor("Example1".to_string()); + provenance.add_contributor(Contributor::from("Example1")); { let mut iter = provenance.contributors.iter(); - assert_eq!(iter.next(), Some(&"Example1".to_string())); + assert_eq!(iter.next(), Some(&Contributor::from("Example1"))); assert_eq!(iter.next(), None); } - provenance.add_contributor("Example2".to_string()); + provenance.add_contributor(Contributor::from("Example2")); { let mut iter = provenance.contributors.iter(); - assert_eq!(iter.next(), Some(&"Example1".to_string())); - assert_eq!(iter.next(), Some(&"Example2".to_string())); + assert_eq!(iter.next(), Some(&Contributor::from("Example1"))); + assert_eq!(iter.next(), Some(&Contributor::from("Example2"))); assert_eq!(iter.next(), None); } - provenance.add_contributor("Example3".to_string()); + provenance.add_contributor(Contributor::from("Example3")); { let mut iter = provenance.contributors.iter(); - assert_eq!(iter.next(), Some(&"Example1".to_string())); - assert_eq!(iter.next(), Some(&"Example2".to_string())); - assert_eq!(iter.next(), Some(&"Example3".to_string())); + assert_eq!(iter.next(), Some(&Contributor::from("Example1"))); + assert_eq!(iter.next(), Some(&Contributor::from("Example2"))); + assert_eq!(iter.next(), Some(&Contributor::from("Example3"))); assert_eq!(iter.next(), None); } - provenance.remove_contributor("Example2".to_string())?; + provenance.remove_contributor(Contributor::from("Example2"))?; { let mut iter = provenance.contributors.iter(); - assert_eq!(iter.next(), Some(&"Example1".to_string())); - assert_eq!(iter.next(), Some(&"Example3".to_string())); + assert_eq!(iter.next(), Some(&Contributor::from("Example1"))); + assert_eq!(iter.next(), Some(&Contributor::from("Example3"))); assert_eq!(iter.next(), None); }