From 29d78486bb96a3b7824609a50e0c4cffd000caef Mon Sep 17 00:00:00 2001 From: cp-james-harbeck Date: Sun, 27 Aug 2023 14:41:51 -0400 Subject: [PATCH 1/3] Replace int based IDs with UUID. --- agentmemory/main.py | 56 +++++++++++++++++++------------------- agentmemory/postgres.py | 60 +++++++++++++++++++---------------------- 2 files changed, 55 insertions(+), 61 deletions(-) diff --git a/agentmemory/main.py b/agentmemory/main.py index 0217c43..815c6fa 100644 --- a/agentmemory/main.py +++ b/agentmemory/main.py @@ -10,7 +10,6 @@ get_include_types, ) - from agentmemory.client import get_client def create_memory(category, text, metadata={}, embedding=None, id=None): @@ -18,48 +17,49 @@ def create_memory(category, text, metadata={}, embedding=None, id=None): Create a new memory in a collection. Arguments: - category (str): Category of the collection. - text (str): Document text. - id (str): Unique id. - metadata (dict): Metadata. + - category (str): Category of the collection. + - text (str): Document text. + - metadata (dict): Metadata. + - embedding: Embedding vector, if available. + - id (str or None): Unique ID for the memory. If None, a UUID will be generated. Returns: - None + - The generated UUID or the provided ID. Example: >>> create_memory('sample_category', 'sample_text', id='sample_id', metadata={'sample_key': 'sample_value'}) """ - - # get or create the collection + + # Get or create the collection memories = get_client().get_or_create_collection(category) - # add timestamps to metadata + # Add timestamps to metadata metadata["created_at"] = datetime.datetime.now().timestamp() metadata["updated_at"] = datetime.datetime.now().timestamp() - # if no id is provided, generate one based on count of documents in collection - if id is None: - id = str(memories.count()) - # pad the id with zeros to make it 16 digits long - id = id.zfill(16) - - # for each field in metadata... - # if the field is a boolean, convert it to a string + # For each field in metadata, if the field is a boolean, convert it to a string for key, value in metadata.items(): if isinstance(value, bool) or isinstance(value, dict) or isinstance(value, list): - debug_log(f"WARNING: Boolean metadata field {key} converted to string") + debug_log(f"WARNING: Non-string metadata field {key} converted to string") metadata[key] = str(value) - # insert the document into the collection - memories.upsert( - ids=[str(id)], + # Prepare a list for the IDs + ids = [id] if id is not None else [] + + # Insert the document into the collection + memories.add( + ids=ids, documents=[text], metadatas=[metadata], - embeddings=[embedding] if embedding is not None else None, + embeddings=[embedding] if embedding is not None else None ) + + # Here, we assume that `add` method appends the newly generated ID to the `ids` list. + memory_id = ids[0] - debug_log(f"Created memory {id}: {text}", metadata) - return id + debug_log(f"Created memory {memory_id}: {text}", metadata) + + return memory_id # This will now be a UUID or the ID you provided def create_unique_memory(category, content, metadata={}, similarity=0.95): @@ -203,11 +203,11 @@ def get_memory(category, id, include_embeddings=True): Returns: dict: The retrieved memory. - - Example: - >>> get_memory("books", "1") """ + # No need to check UUID here as PostgreSQL will handle the type casting + debug_log(f"Getting memory with ID: {id}, Type: {type(id)}") + # Get or create the collection for the given category memories = get_client().get_or_create_collection(category) @@ -555,4 +555,4 @@ def wipe_all_memories(): for collection in collections: client.delete_collection(collection.name) - debug_log("Wiped all memories", type="system") + debug_log("Wiped all memories", type="system") \ No newline at end of file diff --git a/agentmemory/postgres.py b/agentmemory/postgres.py index 8e3f1ee..580a304 100644 --- a/agentmemory/postgres.py +++ b/agentmemory/postgres.py @@ -1,5 +1,6 @@ from pathlib import Path import psycopg2 +import uuid from agentmemory.check_model import check_model, infer_embeddings @@ -69,13 +70,13 @@ def count(self): def add(self, ids, documents=None, metadatas=None, embeddings=None): if embeddings is None: - for id_, document, metadata in zip(ids, documents, metadatas): - self.client.insert_memory(self.category, document, metadata) + for document, metadata in zip(documents, metadatas): + generated_id = self.client.insert_memory(self.category, document, metadata) + ids.append(generated_id) # appending the returned id to ids list else: - for id_, document, metadata, emb in zip( - ids, documents, metadatas, embeddings - ): - self.client.insert_memory(self.category, document, metadata, emb) + for document, metadata, emb in zip(documents, metadatas, embeddings): + generated_id = self.client.insert_memory(self.category, document, metadata, emb) + ids.append(generated_id) # appending the returned id to ids list def get( self, @@ -118,13 +119,10 @@ def get( self.client._ensure_metadata_columns_exist(category, parse_metadata(where)) if ids: - if not all(isinstance(i, str) or isinstance(i, int) for i in ids): - raise Exception( - "ids must be a list of integers or strings representing integers" - ) - ids = [int(i) for i in ids] - conditions.append("id=ANY(%s)") - params.append(ids) + # Type casting to uuid + conditions.append("id=ANY(%s::uuid[])") + params.append([str(id_) for id_ in ids]) + if limit is None: limit = 100 # or another default value @@ -221,13 +219,14 @@ def delete(self, ids=None, where=None, where_document=None): params.append(f"%{where_document}%") if ids: - if not all(isinstance(i, str) or isinstance(i, int) for i in ids): - raise Exception( - "ids must be a list of integers or strings representing integers" - ) - ids = [int(i) for i in ids] - conditions.append("id=ANY(%s::int[])") # Added explicit type casting - params.append(ids) + # Validate UUIDs + try: + ids = [uuid.UUID(str(i)) for i in ids] + except ValueError: + raise Exception("ids must be a list of valid UUIDs or strings that can be converted to UUIDs") + + conditions.append("id=ANY(%s::uuid[])") # Use uuid[] for PostgreSQL UUID array type + params.append([str(id_) for id_ in ids]) if where: for key, value in where.items(): @@ -262,7 +261,6 @@ def __init__(self, name): default_model_path = str(Path.home() / ".cache" / "onnx_models") - class PostgresClient: def __init__( self, @@ -286,11 +284,12 @@ def ensure_table_exists(self, category): self.cur.execute( f""" CREATE TABLE IF NOT EXISTS {table_name} ( - id SERIAL PRIMARY KEY, + id uuid DEFAULT uuid_generate_v4(), document TEXT NOT NULL, - embedding VECTOR(384) + embedding VECTOR(384), + PRIMARY KEY (id) ) - """ + """ ) self.connection.commit() @@ -343,14 +342,9 @@ def insert_memory(self, category, document, metadata={}, embedding=None, id=None if embedding is None: embedding = self.create_embedding(document) - # if the id is None, get the length of the table by counting the number of rows in the category - if id is None: - id = self.get_or_create_collection(category).count() - - # Extracting the keys and values from metadata to insert them into respective columns - columns = ["id", "document", "embedding"] + list(metadata.keys()) + columns = ["document", "embedding"] + list(metadata.keys()) placeholders = ["%s"] * len(columns) - values = [id, document, embedding] + list(metadata.values()) + values = [document, embedding] + list(metadata.values()) query = f""" INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({', '.join(placeholders)}) @@ -358,7 +352,7 @@ def insert_memory(self, category, document, metadata={}, embedding=None, id=None """ self.cur.execute(query, tuple(values)) self.connection.commit() - return self.cur.fetchone()[0] + return self.cur.fetchone()[0] # This will fetch the generated UUID def create_embedding(self, document): embeddings = infer_embeddings([document], model_path=self.model_path) @@ -495,4 +489,4 @@ def update(self, category, id_, document=None, metadata=None, embedding=None): def close(self): self.cur.close() - self.connection.close() + self.connection.close() \ No newline at end of file From ae9e3dfca9633f5255eddf82af5a1e147f2ef781 Mon Sep 17 00:00:00 2001 From: cp-james-harbeck Date: Sun, 27 Aug 2023 16:23:13 -0400 Subject: [PATCH 2/3] Replace int based IDs with UUID. --- agentmemory/main.py | 22 ++++++++++++++------ agentmemory/tests/events.py | 18 ++++++++++++++-- agentmemory/tests/main.py | 35 ++++++++++++++++++++++++-------- agentmemory/tests/persistence.py | 21 +++++++++++-------- 4 files changed, 71 insertions(+), 25 deletions(-) diff --git a/agentmemory/main.py b/agentmemory/main.py index 815c6fa..4a7b4ef 100644 --- a/agentmemory/main.py +++ b/agentmemory/main.py @@ -1,5 +1,6 @@ import datetime import os +import uuid os.environ["TOKENIZERS_PARALLELISM"] = "false" @@ -12,6 +13,8 @@ from agentmemory.client import get_client + + def create_memory(category, text, metadata={}, embedding=None, id=None): """ Create a new memory in a collection. @@ -29,7 +32,7 @@ def create_memory(category, text, metadata={}, embedding=None, id=None): Example: >>> create_memory('sample_category', 'sample_text', id='sample_id', metadata={'sample_key': 'sample_value'}) """ - + # Get or create the collection memories = get_client().get_or_create_collection(category) @@ -43,8 +46,12 @@ def create_memory(category, text, metadata={}, embedding=None, id=None): debug_log(f"WARNING: Non-string metadata field {key} converted to string") metadata[key] = str(value) + # If id is None, generate a UUID + if id is None: + id = str(uuid.uuid4()) + # Prepare a list for the IDs - ids = [id] if id is not None else [] + ids = [id] # Insert the document into the collection memories.add( @@ -53,12 +60,15 @@ def create_memory(category, text, metadata={}, embedding=None, id=None): metadatas=[metadata], embeddings=[embedding] if embedding is not None else None ) - - # Here, we assume that `add` method appends the newly generated ID to the `ids` list. - memory_id = ids[0] - debug_log(f"Created memory {memory_id}: {text}", metadata) + # Retrieve the ID of the newly created memory + memory_id = ids[0] if ids else None + + if memory_id is None: + debug_log("Failed to create memory. No ID returned.", type="error") + return None + debug_log(f"Created memory {memory_id}: {text}", metadata) return memory_id # This will now be a UUID or the ID you provided diff --git a/agentmemory/tests/events.py b/agentmemory/tests/events.py index c15e1ff..6208e4b 100644 --- a/agentmemory/tests/events.py +++ b/agentmemory/tests/events.py @@ -42,9 +42,23 @@ def test_create_event(): def test_get_events(): wipe_category("events") reset_epoch() + + # Create 5 events for i in range(5): - create_event("test event " + str(i + 1)) + create_event(f"test event {i + 1}") + + # Assert that 5 events are present assert len(get_events()) == 5 - assert get_events(1)[0]["document"] == "test event 5" + + # Fetch 1 event (or however your function's argument works) + events = get_events(1) + + # Create a list of all the documents in the retrieved events + retrieved_events = [event['document'] for event in events] + + # Assert that the document "test event 5" is in the retrieved events + assert "test event 5" in retrieved_events + + # Clean up wipe_category("events") wipe_category("epoch") diff --git a/agentmemory/tests/main.py b/agentmemory/tests/main.py index a87659a..350162e 100644 --- a/agentmemory/tests/main.py +++ b/agentmemory/tests/main.py @@ -16,21 +16,27 @@ def test_memory_creation_and_retrieval(): wipe_category("test") - # create 10 memories + + # Create 20 memories for i in range(20): create_memory( - "test", "document " + str(i), metadata={"test": "test", "test2": "test2"} + "test", f"document {i}", metadata={"test": "test", "test2": "test2"} ) + # Retrieve 10 memories with filtering memories = get_memories( "test", filter_metadata={"test": "test", "test2": "test2"}, n_results=10 ) - # assert length of memories is 10 + # Assert length of memories is 10 assert len(memories) == 10 - # assert that the first memory is document 19 - assert memories[0]["document"] == "document 19" + # Create a list of all the documents in the retrieved memories + retrieved_documents = [memory['document'] for memory in memories] + + # Assert that the document "document 19" is in the retrieved documents + assert "document 19" in retrieved_documents + wipe_category("test") @@ -168,21 +174,32 @@ def test_delete_similar_memories(): def test_create_unique_memory(): wipe_category("test") + # Test creating a novel memory create_unique_memory("test", "unique_memory_1", similarity=0.1) memories = get_memories("test") assert len(memories) == 1 - assert memories[0]["metadata"]["novel"] == "True" + + # Instead of relying on the order, filter the memories based on their content + novel_memories = [mem for mem in memories if mem['metadata']['novel'] == "True"] + assert len(novel_memories) == 1 # Test creating a non-novel memory similar to the existing one create_unique_memory("test", "unique_memory_1", similarity=0.1) memories = get_memories("test") assert len(memories) == 2 - assert memories[0]["metadata"]["novel"] == "False" + + # Filter non-novel memories + non_novel_memories = [mem for mem in memories if mem['metadata']['novel'] == "False"] + assert len(non_novel_memories) == 1 # Test creating a non-novel memory similar to the existing one create_unique_memory("test", "common_object_a", similarity=0.9) memories = get_memories("test") assert len(memories) == 3 - assert memories[0]["metadata"]["novel"] == "True" - wipe_category("test") \ No newline at end of file + + # Re-filter for novel memories + novel_memories = [mem for mem in memories if mem['metadata']['novel'] == "True"] + assert len(novel_memories) == 2 + + wipe_category("test") diff --git a/agentmemory/tests/persistence.py b/agentmemory/tests/persistence.py index d194b59..4f41e98 100644 --- a/agentmemory/tests/persistence.py +++ b/agentmemory/tests/persistence.py @@ -1,4 +1,5 @@ import os +import uuid from agentmemory import ( create_memory, @@ -13,7 +14,9 @@ def test_memory_export_import(): wipe_all_memories() - create_memory("test", "not document 1", metadata={"test": "test"}) + memory_id = create_memory("test", "not document 1", metadata={"test": "test"}) + assert isinstance(uuid.UUID(memory_id), uuid.UUID) # Validate it's a UUID + export_memory_to_file("./test_memories.json") import_file_to_memory("./test_memories.json") os.remove("./test_memories.json") @@ -23,7 +26,9 @@ def test_memory_export_import(): def test_export_memory_to_json(): - create_memory("test", "document 1", metadata={"test": "test"}) + memory_id = create_memory("test", "document 1", metadata={"test": "test"}) + assert isinstance(uuid.UUID(memory_id), uuid.UUID) # Validate it's a UUID + export_dict = export_memory_to_json() assert "test" in export_dict assert export_dict["test"][0]["document"] == "document 1" @@ -31,7 +36,7 @@ def test_export_memory_to_json(): def test_import_json_to_memory(): data = { - "test": [{"document": "document 1", "metadata": {"test": "test"}, "id": "1"}] + "test": [{"document": "document 1", "metadata": {"test": "test"}, "id": str(uuid.uuid4())}] # Generating a UUID } import_json_to_memory(data) test_memories = get_memories("test") @@ -39,13 +44,13 @@ def test_import_json_to_memory(): def test_import_file_to_memory(): - create_memory("test", "document 1", metadata={"test": "test"}) + memory_id = create_memory("test", "document 1", metadata={"test": "test"}) + assert isinstance(uuid.UUID(memory_id), uuid.UUID) # Validate it's a UUID + export_memory_to_file("./test_memories.json") - # Wipe out all memories wipe_all_memories() - # Now import from the file we exported import_file_to_memory("./test_memories.json") - # Remove the file after test os.remove("./test_memories.json") + test_memories = get_memories("test") - assert test_memories[0]["document"] == "document 1" + assert test_memories[0]["document"] == "document 1" \ No newline at end of file From a3a87a64a3d5fd9af2cb1f464a7b0813810c597d Mon Sep 17 00:00:00 2001 From: cp-james-harbeck Date: Sun, 27 Aug 2023 16:49:33 -0400 Subject: [PATCH 3/3] Revert "Replace int based IDs with UUID." This reverts commit ae9e3dfca9633f5255eddf82af5a1e147f2ef781. --- agentmemory/main.py | 22 ++++++-------------- agentmemory/tests/events.py | 18 ++-------------- agentmemory/tests/main.py | 35 ++++++++------------------------ agentmemory/tests/persistence.py | 21 ++++++++----------- 4 files changed, 25 insertions(+), 71 deletions(-) diff --git a/agentmemory/main.py b/agentmemory/main.py index 4a7b4ef..815c6fa 100644 --- a/agentmemory/main.py +++ b/agentmemory/main.py @@ -1,6 +1,5 @@ import datetime import os -import uuid os.environ["TOKENIZERS_PARALLELISM"] = "false" @@ -13,8 +12,6 @@ from agentmemory.client import get_client - - def create_memory(category, text, metadata={}, embedding=None, id=None): """ Create a new memory in a collection. @@ -32,7 +29,7 @@ def create_memory(category, text, metadata={}, embedding=None, id=None): Example: >>> create_memory('sample_category', 'sample_text', id='sample_id', metadata={'sample_key': 'sample_value'}) """ - + # Get or create the collection memories = get_client().get_or_create_collection(category) @@ -46,12 +43,8 @@ def create_memory(category, text, metadata={}, embedding=None, id=None): debug_log(f"WARNING: Non-string metadata field {key} converted to string") metadata[key] = str(value) - # If id is None, generate a UUID - if id is None: - id = str(uuid.uuid4()) - # Prepare a list for the IDs - ids = [id] + ids = [id] if id is not None else [] # Insert the document into the collection memories.add( @@ -60,15 +53,12 @@ def create_memory(category, text, metadata={}, embedding=None, id=None): metadatas=[metadata], embeddings=[embedding] if embedding is not None else None ) - - # Retrieve the ID of the newly created memory - memory_id = ids[0] if ids else None - - if memory_id is None: - debug_log("Failed to create memory. No ID returned.", type="error") - return None + + # Here, we assume that `add` method appends the newly generated ID to the `ids` list. + memory_id = ids[0] debug_log(f"Created memory {memory_id}: {text}", metadata) + return memory_id # This will now be a UUID or the ID you provided diff --git a/agentmemory/tests/events.py b/agentmemory/tests/events.py index 6208e4b..c15e1ff 100644 --- a/agentmemory/tests/events.py +++ b/agentmemory/tests/events.py @@ -42,23 +42,9 @@ def test_create_event(): def test_get_events(): wipe_category("events") reset_epoch() - - # Create 5 events for i in range(5): - create_event(f"test event {i + 1}") - - # Assert that 5 events are present + create_event("test event " + str(i + 1)) assert len(get_events()) == 5 - - # Fetch 1 event (or however your function's argument works) - events = get_events(1) - - # Create a list of all the documents in the retrieved events - retrieved_events = [event['document'] for event in events] - - # Assert that the document "test event 5" is in the retrieved events - assert "test event 5" in retrieved_events - - # Clean up + assert get_events(1)[0]["document"] == "test event 5" wipe_category("events") wipe_category("epoch") diff --git a/agentmemory/tests/main.py b/agentmemory/tests/main.py index 350162e..a87659a 100644 --- a/agentmemory/tests/main.py +++ b/agentmemory/tests/main.py @@ -16,27 +16,21 @@ def test_memory_creation_and_retrieval(): wipe_category("test") - - # Create 20 memories + # create 10 memories for i in range(20): create_memory( - "test", f"document {i}", metadata={"test": "test", "test2": "test2"} + "test", "document " + str(i), metadata={"test": "test", "test2": "test2"} ) - # Retrieve 10 memories with filtering memories = get_memories( "test", filter_metadata={"test": "test", "test2": "test2"}, n_results=10 ) - # Assert length of memories is 10 + # assert length of memories is 10 assert len(memories) == 10 - # Create a list of all the documents in the retrieved memories - retrieved_documents = [memory['document'] for memory in memories] - - # Assert that the document "document 19" is in the retrieved documents - assert "document 19" in retrieved_documents - + # assert that the first memory is document 19 + assert memories[0]["document"] == "document 19" wipe_category("test") @@ -174,32 +168,21 @@ def test_delete_similar_memories(): def test_create_unique_memory(): wipe_category("test") - # Test creating a novel memory create_unique_memory("test", "unique_memory_1", similarity=0.1) memories = get_memories("test") assert len(memories) == 1 - - # Instead of relying on the order, filter the memories based on their content - novel_memories = [mem for mem in memories if mem['metadata']['novel'] == "True"] - assert len(novel_memories) == 1 + assert memories[0]["metadata"]["novel"] == "True" # Test creating a non-novel memory similar to the existing one create_unique_memory("test", "unique_memory_1", similarity=0.1) memories = get_memories("test") assert len(memories) == 2 - - # Filter non-novel memories - non_novel_memories = [mem for mem in memories if mem['metadata']['novel'] == "False"] - assert len(non_novel_memories) == 1 + assert memories[0]["metadata"]["novel"] == "False" # Test creating a non-novel memory similar to the existing one create_unique_memory("test", "common_object_a", similarity=0.9) memories = get_memories("test") assert len(memories) == 3 - - # Re-filter for novel memories - novel_memories = [mem for mem in memories if mem['metadata']['novel'] == "True"] - assert len(novel_memories) == 2 - - wipe_category("test") + assert memories[0]["metadata"]["novel"] == "True" + wipe_category("test") \ No newline at end of file diff --git a/agentmemory/tests/persistence.py b/agentmemory/tests/persistence.py index 4f41e98..d194b59 100644 --- a/agentmemory/tests/persistence.py +++ b/agentmemory/tests/persistence.py @@ -1,5 +1,4 @@ import os -import uuid from agentmemory import ( create_memory, @@ -14,9 +13,7 @@ def test_memory_export_import(): wipe_all_memories() - memory_id = create_memory("test", "not document 1", metadata={"test": "test"}) - assert isinstance(uuid.UUID(memory_id), uuid.UUID) # Validate it's a UUID - + create_memory("test", "not document 1", metadata={"test": "test"}) export_memory_to_file("./test_memories.json") import_file_to_memory("./test_memories.json") os.remove("./test_memories.json") @@ -26,9 +23,7 @@ def test_memory_export_import(): def test_export_memory_to_json(): - memory_id = create_memory("test", "document 1", metadata={"test": "test"}) - assert isinstance(uuid.UUID(memory_id), uuid.UUID) # Validate it's a UUID - + create_memory("test", "document 1", metadata={"test": "test"}) export_dict = export_memory_to_json() assert "test" in export_dict assert export_dict["test"][0]["document"] == "document 1" @@ -36,7 +31,7 @@ def test_export_memory_to_json(): def test_import_json_to_memory(): data = { - "test": [{"document": "document 1", "metadata": {"test": "test"}, "id": str(uuid.uuid4())}] # Generating a UUID + "test": [{"document": "document 1", "metadata": {"test": "test"}, "id": "1"}] } import_json_to_memory(data) test_memories = get_memories("test") @@ -44,13 +39,13 @@ def test_import_json_to_memory(): def test_import_file_to_memory(): - memory_id = create_memory("test", "document 1", metadata={"test": "test"}) - assert isinstance(uuid.UUID(memory_id), uuid.UUID) # Validate it's a UUID - + create_memory("test", "document 1", metadata={"test": "test"}) export_memory_to_file("./test_memories.json") + # Wipe out all memories wipe_all_memories() + # Now import from the file we exported import_file_to_memory("./test_memories.json") + # Remove the file after test os.remove("./test_memories.json") - test_memories = get_memories("test") - assert test_memories[0]["document"] == "document 1" \ No newline at end of file + assert test_memories[0]["document"] == "document 1"