diff --git a/VERSION b/VERSION index d48d3702..a04abec9 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.6.9 +2.6.10 diff --git a/src/requirements.txt b/src/requirements.txt index 68b518e2..1449a271 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,7 +1,6 @@ Flask==3.0.3 neo4j==5.20.0 prov==2.0.1 -Werkzeug==3.0.3 deepdiff==7.0.1 # For interacting with memcached diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py index 957b5d06..b8d6efda 100644 --- a/src/schema/schema_constants.py +++ b/src/schema/schema_constants.py @@ -54,3 +54,13 @@ class TriggerTypeEnum(Enum): BEFORE_UPDATE = 'before_update_trigger' AFTER_CREATE = 'after_create_trigger' AFTER_UPDATE = 'after_update_trigger' + +# Define an enumeration of accepted Neo4j relationship types. +class Neo4jRelationshipEnum(Enum): + ACTIVITY_INPUT = 'ACTIVITY_INPUT' + ACTIVITY_OUTPUT = 'ACTIVITY_INPUT' + IN_COLLECTION = 'IN_COLLECTION' + IN_UPLOAD = 'IN_UPLOAD' + REVISION_OF = 'REVISION_OF' + USES_DATA = 'USES_DATA' + diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index e3463947..a663e04d 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -1,5 +1,7 @@ +import neo4j from neo4j.exceptions import TransactionError -from schema.schema_constants import SchemaConstants +from neo4j import Session as Neo4jSession +from schema.schema_constants import SchemaConstants, Neo4jRelationshipEnum import logging logger = logging.getLogger(__name__) @@ -7,7 +9,6 @@ # The filed name of the single result record record_field_name = 'result' - #################################################################################################### ## Functions can be called by app.py, schema_manager.py, and schema_triggers.py #################################################################################################### @@ -109,7 +110,38 @@ def get_entity(neo4j_driver, uuid): return result +""" +Given a list of UUIDs, return a dict mapping uuid -> entity_node +Only UUIDs present in Neo4j will be returned. +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +uuid_list : list of str + The uuids of target entities to retrieve from Neo4j + +Returns +------- +dict + A dictionary of entity details returned from the Cypher query, keyed by + the uuid provided in uuid_list. +""" +def identify_existing_dataset_entities(neo4j_driver, dataset_uuid_list:list): + + if not dataset_uuid_list: + return {} + + query = """ + MATCH (e:Entity) + WHERE e.uuid IN $param_uuids + AND e.entity_type='Dataset' + RETURN e.uuid AS uuid + """ + + with neo4j_driver.session() as session: + results = session.run(query, param_uuids=dataset_uuid_list) + return [record["uuid"] for record in results] """ Get the uuids for each entity in a list that doesn't belong to a certain entity type. Uuids are ordered by type @@ -889,13 +921,11 @@ def link_collection_to_datasets(neo4j_driver, collection_uuid, dataset_uuid_list _delete_collection_linkages_tx(tx=tx , uuid=collection_uuid) - # Create relationship from each member Dataset node to this Collection node - for dataset_uuid in dataset_uuid_list: - create_relationship_tx(tx=tx - , source_node_uuid=dataset_uuid - , direction='->' - , target_node_uuid=collection_uuid - , relationship='IN_COLLECTION') + _create_relationships_unwind_tx(tx=tx + , source_uuid_list=dataset_uuid_list + , target_uuid=collection_uuid + , relationship=Neo4jRelationshipEnum.IN_COLLECTION + , direction='->') tx.commit() except TransactionError as te: @@ -1980,6 +2010,43 @@ def create_relationship_tx(tx, source_node_uuid, target_node_uuid, relationship, result = tx.run(query) +""" +Create multiple relationships between a target node and each node in +a list of source nodes in neo4j + +Parameters +---------- +tx : neo4j.Session object + The neo4j.Session object instance +source_uuid_list : list[str] + A list of UUIDs for nodes which will have a relationship to the node with target_uuid +target_uuid : str + The UUID of target node +relationship : Neo4jRelationshipEnum + The string for the Neo4j relationship type between each source node and the target node. +direction: str + The relationship direction of each source node to the target node: outgoing `->` or incoming `<-` + Neo4j CQL CREATE command supports only directional relationships +""" +def _create_relationships_unwind_tx(tx:Neo4jSession, source_uuid_list:list, target_uuid:str + , relationship:Neo4jRelationshipEnum, direction:str)->None: + logger.info("====== enter _create_relationships_unwind_tx() ======") + incoming = direction if direction == "<-" else "-" + outgoing = direction if direction == "->" else "-" + + query = ( + f"MATCH (t {{uuid: $target_uuid}}) " + f"UNWIND $source_uuid_list AS src_uuid " + f"MATCH (s {{uuid: src_uuid}}) " + f"CREATE (s){incoming}[r:{relationship.value}]{outgoing}(t) " + f"RETURN src_uuid AS linked_uuid" + ) + + result = tx.run( query=query + , target_uuid=target_uuid + , source_uuid_list=source_uuid_list) + logger.info("====== returning from _create_relationships_unwind_tx() ======") + """ Execute one query to create all outgoing relationships from each node whose identifier is in the source node list to the target Activity node in neo4j diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index 078f319a..0ca4acb5 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -298,7 +298,8 @@ def verify_DOI_pair(property_key, normalized_entity_type, request, existing_data f" the prefix {SchemaConstants.DOI_BASE_URL}.") """ -Validate every entity in a list is of entity_type accepted +Validate every entity in a list is of entity_type that can be in a +Collection and already exists in Neo4j. Parameters ---------- @@ -318,41 +319,20 @@ def collection_entities_are_existing_datasets(property_key, normalized_entity_ty # Verify each UUID specified exists in the uuid-api, exists in Neo4j, and is for a Dataset before # proceeding with creation of Collection. bad_dataset_uuids = [] - for dataset_uuid in new_data_dict['dataset_uuids']: - try: - ## The following code duplicates some functionality existing in app.py, in - ## query_target_entity(), which also deals with caching. In the future, the - ## validation logic shared by this file and app.py should become a utility - ## module, shared by validators as well as app.py. But for now, the code - ## is repeated for the following. - - # Get cached ids if exist otherwise retrieve from UUID-API. Expect an - # Exception to be raised if not found. - dataset_uuid_entity = schema_manager.get_hubmap_ids(id=dataset_uuid) - - # If the uuid exists per the uuid-api, make sure it also exists as a Neo4j entity. - uuid = dataset_uuid_entity['uuid'] - entity_dict = schema_neo4j_queries.get_entity(schema_manager.get_neo4j_driver_instance(), dataset_uuid) - - # If dataset_uuid is not found in Neo4j or is not for a Dataset, fail the validation. - if not entity_dict: - logger.info(f"Request for {dataset_uuid} inclusion in Collection," - f" but not found in Neo4j.") - bad_dataset_uuids.append(dataset_uuid) - elif entity_dict['entity_type'] != 'Dataset': - logger.info(f"Request for {dataset_uuid} inclusion in Collection," - f" but entity_type={entity_dict['entity_type']}, not Dataset.") - bad_dataset_uuids.append(dataset_uuid) - except Exception as nfe: - # If the dataset_uuid is not found, fail the validation. - logger.info(f"Request for {dataset_uuid} inclusion in Collection" - f" failed uuid-api retrieval.") - bad_dataset_uuids.append(dataset_uuid) - # If any uuids in the request dataset_uuids are not for an existing Dataset entity which - # exists in uuid-api and Neo4j, raise an Exception so the validation fails and the - # operation can be rejected. - if bad_dataset_uuids: - raise ValueError(f"Unable to find Datasets for {bad_dataset_uuids}.") + dataset_uuid_list = new_data_dict['dataset_uuids'] + if not dataset_uuid_list: + return + + existing_datasets_list = schema_neo4j_queries.identify_existing_dataset_entities( neo4j_driver=schema_manager.get_neo4j_driver_instance() + , dataset_uuid_list=dataset_uuid_list) + + # If any UUIDs which were passed in do not exist in Neo4j or are not Datasets, identify them + missing_uuid_set = set(dataset_uuid_list) - set(existing_datasets_list) + if missing_uuid_set: + logger.info(f"Only existing Datasets may be included in a Collection:" + f" {sorted(missing_uuid_set)}") + raise ValueError( f"Only existing Datasets may be included in a Collection, not these: " + f" {sorted(missing_uuid_set)}") """ Validate the provided value of Dataset.status on update via PUT