From 3afced09cd1729d3d94935339740c1e971ff2311 Mon Sep 17 00:00:00 2001
From: kburke <209327+kburke@users.noreply.github.com>
Date: Mon, 15 Dec 2025 08:44:31 -0500
Subject: [PATCH 1/6] Switch loops over Neo4j calls to single call for
 entity-api#974

---
 src/schema/schema_constants.py     | 10 +++++
 src/schema/schema_neo4j_queries.py | 66 ++++++++++++++++++++++++++----
 src/schema/schema_validators.py    | 62 ++++++++++++----------------
 3 files changed, 94 insertions(+), 44 deletions(-)

diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py
index 957b5d06..7bbc7c14 100644
--- a/src/schema/schema_constants.py
+++ b/src/schema/schema_constants.py
@@ -54,3 +54,13 @@ class TriggerTypeEnum(Enum):
     BEFORE_UPDATE = 'before_update_trigger'
     AFTER_CREATE = 'after_create_trigger'
     AFTER_UPDATE = 'after_update_trigger'
+
+# Define an enumeration of accepted Neo4j relationship types.
+class Neo4jRelationshipEnum(Enum):
+    ACTIVITY_INPUT = 'ACTIVITY_INPUT'
+    ACTIVITY_OUTPUT = 'ACTIVITY_INPUT'
+    IN_COLLECTION = 'IN_COLLECTION'
+    N_UPLOAD = 'N_UPLOAD'
+    REVISION_OF = 'REVISION_OF'
+    USES_DATA = 'USES_DATA'
+
diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
index e3463947..d30e19d9 100644
--- a/src/schema/schema_neo4j_queries.py
+++ b/src/schema/schema_neo4j_queries.py
@@ -1,5 +1,7 @@
+import neo4j
 from neo4j.exceptions import TransactionError
-from schema.schema_constants import SchemaConstants
+from neo4j import Session as Neo4jSession
+from schema.schema_constants import SchemaConstants, Neo4jRelationshipEnum
 import logging
 
 logger = logging.getLogger(__name__)
@@ -7,7 +9,6 @@
 # The filed name of the single result record
 record_field_name = 'result'
 
-
 ####################################################################################################
 ## Functions can be called by app.py, schema_manager.py, and schema_triggers.py
 ####################################################################################################
@@ -109,7 +110,37 @@ def get_entity(neo4j_driver, uuid):
 
     return result
 
+"""
+Given a list of UUIDs, return a dict mapping uuid -> entity_node
+Only UUIDs present in Neo4j will be returned.
+
+Parameters
+----------
+neo4j_driver : neo4j.Driver object
+    The neo4j database connection pool
+uuid_list : list of str
+    The uuids of target entities to retrieve from Neo4j 
+
+Returns
+-------
+dict
+    A dictionary of entity details returned from the Cypher query, keyed by
+    the uuid provided in uuid_list.
+"""
+def get_entities(neo4j_driver, uuid_list):
+
+    if not uuid_list:
+        return {}
+
+    query = """
+        MATCH (e:Entity)
+        WHERE e.uuid IN $param_uuids
+        RETURN e.uuid AS uuid, e AS entity
+    """
 
+    with neo4j_driver.session() as session:
+        results = session.run(query, param_uuids=uuid_list)
+        return {record["uuid"]: record["entity"] for record in results}
 
 """
 Get the uuids for each entity in a list that doesn't belong to a certain entity type. Uuids are ordered by type
@@ -889,13 +920,11 @@ def link_collection_to_datasets(neo4j_driver, collection_uuid, dataset_uuid_list
             _delete_collection_linkages_tx(tx=tx
                                            , uuid=collection_uuid)
 
-            # Create relationship from each member Dataset node to this Collection node
-            for dataset_uuid in dataset_uuid_list:
-                create_relationship_tx(tx=tx
-                                       , source_node_uuid=dataset_uuid
-                                       , direction='->'
-                                       , target_node_uuid=collection_uuid
-                                       , relationship='IN_COLLECTION')
+            _create_relationships_unwind_tx(tx=tx
+                                            , source_uuid_list=dataset_uuid_list
+                                            , target_uuid=collection_uuid
+                                            , relationship=Neo4jRelationshipEnum.IN_COLLECTION
+                                            , direction='->')
 
             tx.commit()
     except TransactionError as te:
@@ -1980,6 +2009,25 @@ def create_relationship_tx(tx, source_node_uuid, target_node_uuid, relationship,
 
     result = tx.run(query)
 
+def _create_relationships_unwind_tx(tx:Neo4jSession, source_uuid_list:list, target_uuid:str
+                                   , relationship:Neo4jRelationshipEnum, direction:str)->None:
+    logger.info("====== enter _create_relationships_unwind_tx() ======")
+    incoming = direction if direction == "<-" else "-"
+    outgoing = direction if direction == "->" else "-"
+
+    query = (
+        f"MATCH (t {{uuid: $target_uuid}}) "
+        f"UNWIND $source_uuid_list AS src_uuid "
+        f"MATCH (s {{uuid: src_uuid}}) "
+        f"CREATE (s){incoming}[r:{relationship.value}]{outgoing}(t) "
+        f"RETURN src_uuid AS linked_uuid"
+    )
+
+    result = tx.run(  query=query
+                    , target_uuid=target_uuid
+                    , source_uuid_list=source_uuid_list)
+    logger.info("====== returning from _create_relationships_unwind_tx() ======")
+
 """
 Execute one query to create all outgoing relationships from each node whose
 identifier is in the source node list to the target Activity node in neo4j
diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py
index 078f319a..bc9eef19 100644
--- a/src/schema/schema_validators.py
+++ b/src/schema/schema_validators.py
@@ -298,7 +298,8 @@ def verify_DOI_pair(property_key, normalized_entity_type, request, existing_data
                             f" the prefix {SchemaConstants.DOI_BASE_URL}.")
 
 """
-Validate every entity in a list is of entity_type accepted
+Validate every entity in a list is of entity_type that can be in a
+Collection and already exists in Neo4j. 
 
 Parameters
 ----------
@@ -318,41 +319,32 @@ def collection_entities_are_existing_datasets(property_key, normalized_entity_ty
     # Verify each UUID specified exists in the uuid-api, exists in Neo4j, and is for a Dataset before
     # proceeding with creation of Collection.
     bad_dataset_uuids = []
-    for dataset_uuid in new_data_dict['dataset_uuids']:
-        try:
-            ## The following code duplicates some functionality existing in app.py, in
-            ## query_target_entity(), which also deals with caching. In the future, the
-            ## validation logic shared by this file and app.py should become a utility
-            ## module, shared by validators as well as app.py.  But for now, the code
-            ## is repeated for the following.
-
-            # Get cached ids if exist otherwise retrieve from UUID-API. Expect an
-            # Exception to be raised if not found.
-            dataset_uuid_entity = schema_manager.get_hubmap_ids(id=dataset_uuid)
-
-            # If the uuid exists per the uuid-api, make sure it also exists as a Neo4j entity.
-            uuid = dataset_uuid_entity['uuid']
-            entity_dict = schema_neo4j_queries.get_entity(schema_manager.get_neo4j_driver_instance(), dataset_uuid)
-
-            # If dataset_uuid is not found in Neo4j or is not for a Dataset, fail the validation.
-            if not entity_dict:
-                logger.info(f"Request for {dataset_uuid} inclusion in Collection,"
-                            f" but not found in Neo4j.")
-                bad_dataset_uuids.append(dataset_uuid)
-            elif entity_dict['entity_type'] != 'Dataset':
-                logger.info(f"Request for {dataset_uuid} inclusion in Collection,"
-                            f" but entity_type={entity_dict['entity_type']}, not Dataset.")
-                bad_dataset_uuids.append(dataset_uuid)
-        except Exception as nfe:
-            # If the dataset_uuid is not found, fail the validation.
-            logger.info(f"Request for {dataset_uuid} inclusion in Collection"
-                        f" failed uuid-api retrieval.")
-            bad_dataset_uuids.append(dataset_uuid)
+    dataset_uuid_list = new_data_dict['dataset_uuids']
+    if not dataset_uuid_list:
+        return
+
+    existing_uuid_entities = schema_neo4j_queries.get_entities( neo4j_driver=schema_manager.get_neo4j_driver_instance()
+                                                                , uuid_list=dataset_uuid_list)
+
+    # If any UUIDs which were passed in do not exist in Neo4j or are not Datasets, identify them
+    missing_uuid_set = set(dataset_uuid_list) - set(existing_uuid_entities)
+    if missing_uuid_set:
+        logger.info(f"Request for inclusion in Collection but not found in Neo4j:"
+                    f" {sorted(missing_uuid_set)}")
+
+    non_dataset_uuid_set = set()
+    for uuid, neo4j_entity in existing_uuid_entities.items():
+        if neo4j_entity['entity_type'] != 'Dataset':
+            non_dataset_uuid_set.add(uuid)
+    if non_dataset_uuid_set:
+        logger.info(f"Request for inclusion in Collection, but non-Dataset entities in Neo4j:"
+                    f" {sorted(non_dataset_uuid_set)}")
+
     # If any uuids in the request dataset_uuids are not for an existing Dataset entity which
-    # exists in uuid-api and Neo4j, raise an Exception so the validation fails and the
-    # operation can be rejected.
-    if bad_dataset_uuids:
-        raise ValueError(f"Unable to find Datasets for {bad_dataset_uuids}.")
+    # exists in Neo4j, raise an Exception so the validation fails and the operation can be rejected.
+    if missing_uuid_set or non_dataset_uuid_set:
+        raise ValueError(f"Unable to find Datasets for"
+                            f" {sorted(missing_uuid_set.union(non_dataset_uuid_set))}")
 
 """
 Validate the provided value of Dataset.status on update via PUT

From 1cff0da3b7644c99ac9451d0349a95f00b7f93b1 Mon Sep 17 00:00:00 2001
From: yuanzhou <yuanzhou19@gmail.com>
Date: Mon, 15 Dec 2025 13:58:54 -0500
Subject: [PATCH 2/6] Bump version from 2.6.9 to 2.6.10

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index d48d3702..a04abec9 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.6.9
+2.6.10

From 57dbb4476b0afd5725aeb994f84a4f86b072e9d5 Mon Sep 17 00:00:00 2001
From: kburke <209327+kburke@users.noreply.github.com>
Date: Mon, 15 Dec 2025 14:56:06 -0500
Subject: [PATCH 3/6] Correct enum constant identified by code review

---
 src/schema/schema_constants.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py
index 7bbc7c14..b8d6efda 100644
--- a/src/schema/schema_constants.py
+++ b/src/schema/schema_constants.py
@@ -60,7 +60,7 @@ class Neo4jRelationshipEnum(Enum):
     ACTIVITY_INPUT = 'ACTIVITY_INPUT'
     ACTIVITY_OUTPUT = 'ACTIVITY_INPUT'
     IN_COLLECTION = 'IN_COLLECTION'
-    N_UPLOAD = 'N_UPLOAD'
+    IN_UPLOAD = 'IN_UPLOAD'
     REVISION_OF = 'REVISION_OF'
     USES_DATA = 'USES_DATA'
 

From 8eaa9dee185bfee8c2d1a832e6aa87625c230207 Mon Sep 17 00:00:00 2001
From: kburke <209327+kburke@users.noreply.github.com>
Date: Mon, 15 Dec 2025 15:10:00 -0500
Subject: [PATCH 4/6] Add documentation for new
 _create_relationships_unwind_tx() method, requested during code review.

---
 src/schema/schema_neo4j_queries.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
index d30e19d9..804f9b45 100644
--- a/src/schema/schema_neo4j_queries.py
+++ b/src/schema/schema_neo4j_queries.py
@@ -2009,6 +2009,24 @@ def create_relationship_tx(tx, source_node_uuid, target_node_uuid, relationship,
 
     result = tx.run(query)
 
+"""
+Create multiple relationships between a target node and each node in
+a list of source nodes in neo4j
+
+Parameters
+----------
+tx : neo4j.Session object
+    The neo4j.Session object instance
+source_uuid_list : list[str]
+    A list of UUIDs for nodes which will have a relationship to the node with target_uuid
+target_uuid : str
+    The UUID of target node
+relationship : Neo4jRelationshipEnum
+    The string for the Neo4j relationship type between each source node and the target node.
+direction: str
+    The relationship direction of each source node to the target node: outgoing `->` or incoming `<-`
+    Neo4j CQL CREATE command supports only directional relationships
+"""
 def _create_relationships_unwind_tx(tx:Neo4jSession, source_uuid_list:list, target_uuid:str
                                    , relationship:Neo4jRelationshipEnum, direction:str)->None:
     logger.info("====== enter _create_relationships_unwind_tx() ======")

From d4aed5ebe87710b23815f248512be9de0a374735 Mon Sep 17 00:00:00 2001
From: kburke <209327+kburke@users.noreply.github.com>
Date: Mon, 15 Dec 2025 16:17:38 -0500
Subject: [PATCH 5/6] Eliminate looping results sets and providing specific
 error msgs

---
 src/schema/schema_neo4j_queries.py | 11 ++++++-----
 src/schema/schema_validators.py    | 24 ++++++------------------
 2 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
index 804f9b45..6ab089b8 100644
--- a/src/schema/schema_neo4j_queries.py
+++ b/src/schema/schema_neo4j_queries.py
@@ -127,20 +127,21 @@ def get_entity(neo4j_driver, uuid):
     A dictionary of entity details returned from the Cypher query, keyed by
     the uuid provided in uuid_list.
 """
-def get_entities(neo4j_driver, uuid_list):
+def get_existing_dataset_entities(neo4j_driver, dataset_uuid_list:list):
 
-    if not uuid_list:
+    if not dataset_uuid_list:
         return {}
 
     query = """
         MATCH (e:Entity)
         WHERE e.uuid IN $param_uuids
-        RETURN e.uuid AS uuid, e AS entity
+          AND e.entity_type='Dataset'
+        RETURN  e.uuid AS uuid
     """
 
     with neo4j_driver.session() as session:
-        results = session.run(query, param_uuids=uuid_list)
-        return {record["uuid"]: record["entity"] for record in results}
+        results = session.run(query, param_uuids=dataset_uuid_list)
+        return [record["uuid"] for record in results]
 
 """
 Get the uuids for each entity in a list that doesn't belong to a certain entity type. Uuids are ordered by type
diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py
index bc9eef19..1cd1e735 100644
--- a/src/schema/schema_validators.py
+++ b/src/schema/schema_validators.py
@@ -323,28 +323,16 @@ def collection_entities_are_existing_datasets(property_key, normalized_entity_ty
     if not dataset_uuid_list:
         return
 
-    existing_uuid_entities = schema_neo4j_queries.get_entities( neo4j_driver=schema_manager.get_neo4j_driver_instance()
-                                                                , uuid_list=dataset_uuid_list)
+    existing_datasets_list = schema_neo4j_queries.get_existing_dataset_entities(    neo4j_driver=schema_manager.get_neo4j_driver_instance()
+                                                                                    , dataset_uuid_list=dataset_uuid_list)
 
     # If any UUIDs which were passed in do not exist in Neo4j or are not Datasets, identify them
-    missing_uuid_set = set(dataset_uuid_list) - set(existing_uuid_entities)
+    missing_uuid_set = set(dataset_uuid_list) - set(existing_datasets_list)
     if missing_uuid_set:
-        logger.info(f"Request for inclusion in Collection but not found in Neo4j:"
+        logger.info(f"Only existing Datasets may be included in a Collection:"
                     f" {sorted(missing_uuid_set)}")
-
-    non_dataset_uuid_set = set()
-    for uuid, neo4j_entity in existing_uuid_entities.items():
-        if neo4j_entity['entity_type'] != 'Dataset':
-            non_dataset_uuid_set.add(uuid)
-    if non_dataset_uuid_set:
-        logger.info(f"Request for inclusion in Collection, but non-Dataset entities in Neo4j:"
-                    f" {sorted(non_dataset_uuid_set)}")
-
-    # If any uuids in the request dataset_uuids are not for an existing Dataset entity which
-    # exists in Neo4j, raise an Exception so the validation fails and the operation can be rejected.
-    if missing_uuid_set or non_dataset_uuid_set:
-        raise ValueError(f"Unable to find Datasets for"
-                            f" {sorted(missing_uuid_set.union(non_dataset_uuid_set))}")
+        raise ValueError(   f"Only existing Datasets may be included in a Collection, not these: "
+                            f" {sorted(missing_uuid_set)}")
 
 """
 Validate the provided value of Dataset.status on update via PUT

From ab8b63a590d1304bf3b2639d370b79b627a0ea81 Mon Sep 17 00:00:00 2001
From: kburke <209327+kburke@users.noreply.github.com>
Date: Mon, 15 Dec 2025 16:29:07 -0500
Subject: [PATCH 6/6] Rename function to align with limited function

---
 src/schema/schema_neo4j_queries.py | 2 +-
 src/schema/schema_validators.py    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
index 6ab089b8..a663e04d 100644
--- a/src/schema/schema_neo4j_queries.py
+++ b/src/schema/schema_neo4j_queries.py
@@ -127,7 +127,7 @@ def get_entity(neo4j_driver, uuid):
     A dictionary of entity details returned from the Cypher query, keyed by
     the uuid provided in uuid_list.
 """
-def get_existing_dataset_entities(neo4j_driver, dataset_uuid_list:list):
+def identify_existing_dataset_entities(neo4j_driver, dataset_uuid_list:list):
 
     if not dataset_uuid_list:
         return {}
diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py
index 1cd1e735..0ca4acb5 100644
--- a/src/schema/schema_validators.py
+++ b/src/schema/schema_validators.py
@@ -323,8 +323,8 @@ def collection_entities_are_existing_datasets(property_key, normalized_entity_ty
     if not dataset_uuid_list:
         return
 
-    existing_datasets_list = schema_neo4j_queries.get_existing_dataset_entities(    neo4j_driver=schema_manager.get_neo4j_driver_instance()
-                                                                                    , dataset_uuid_list=dataset_uuid_list)
+    existing_datasets_list = schema_neo4j_queries.identify_existing_dataset_entities(   neo4j_driver=schema_manager.get_neo4j_driver_instance()
+                                                                                        , dataset_uuid_list=dataset_uuid_list)
 
     # If any UUIDs which were passed in do not exist in Neo4j or are not Datasets, identify them
     missing_uuid_set = set(dataset_uuid_list) - set(existing_datasets_list)