diff --git a/openviking/storage/queuefs/semantic_processor.py b/openviking/storage/queuefs/semantic_processor.py index 7e300666..2dcd7c2d 100644 --- a/openviking/storage/queuefs/semantic_processor.py +++ b/openviking/storage/queuefs/semantic_processor.py @@ -187,9 +187,42 @@ async def _generate_single_file_summary(self, file_path: str) -> Dict[str, str]: try: # Read file content (limit length) - content = await viking_fs.read_file(file_path) - if isinstance(content, bytes): - content = content.decode("utf-8") + content_bytes = await viking_fs.read_file(file_path) + content = "" + + # Check if it's binary data + if isinstance(content_bytes, bytes): + try: + content = content_bytes.decode("utf-8") + except UnicodeDecodeError: + # It's a binary file (likely image, video, etc.) + # Use VLM to describe it if available + if vlm.is_available(): + # Determine prompt based on file type + prompt = "Please describe this file content briefly for summary." + if file_name.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".webp")): + prompt = "Describe this image in detail for a file summary." + + try: + # Use the new vision capabilities we added! + # Check if get_vision_completion_async exists (it should now) + if hasattr(vlm.get_vlm_instance(), "get_vision_completion_async"): + summary = await vlm.get_vlm_instance().get_vision_completion_async( + prompt, [content_bytes] + ) + return {"name": file_name, "summary": summary.strip()} + else: + logger.warning("VLM instance does not support vision completion") + return {"name": file_name, "summary": f"[Binary file: {file_name}]"} + except Exception as vlm_err: + logger.warning( + f"VLM vision completion failed for {file_name}: {vlm_err}" + ) + return {"name": file_name, "summary": f"[Binary file: {file_name}]"} + else: + return {"name": file_name, "summary": f"[Binary file: {file_name}]"} + else: + content = content_bytes # Limit content length (about 10000 tokens) max_chars = 30000 diff --git a/tests/storage/test_semantic_processor_vlm.py b/tests/storage/test_semantic_processor_vlm.py new file mode 100644 index 00000000..a1869521 --- /dev/null +++ b/tests/storage/test_semantic_processor_vlm.py @@ -0,0 +1,70 @@ +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from openviking.storage.queuefs.semantic_processor import SemanticProcessor + + +@pytest.mark.asyncio +async def test_semantic_processor_image_description(): + """Test that image files trigger VLM vision capabilities""" + processor = SemanticProcessor() + + # Mock VikingFS to return bytes that are not valid utf-8 (simulating image binary) + mock_fs = MagicMock() + mock_fs.read_file = AsyncMock(return_value=b"\xff\xff\xff\xff") + + # Mock VLM Config + mock_vlm_config = MagicMock() + mock_vlm_config.is_available.return_value = True + + # Mock VLM Instance with vision capability + mock_vlm_instance = MagicMock() + mock_vlm_instance.get_vision_completion_async = AsyncMock( + return_value="A detailed description of the image." + ) + mock_vlm_config.get_vlm_instance.return_value = mock_vlm_instance + + with ( + patch("openviking.storage.queuefs.semantic_processor.get_viking_fs", return_value=mock_fs), + patch( + "openviking.storage.queuefs.semantic_processor.get_openviking_config" + ) as mock_get_config, + ): + mock_get_config.return_value.vlm = mock_vlm_config + + # Test with png extension to trigger vision path + result = await processor._generate_single_file_summary("test_image.png") + + assert result["name"] == "test_image.png" + assert result["summary"] == "A detailed description of the image." + + # Verify vision method was called + mock_vlm_instance.get_vision_completion_async.assert_called_once() + # Verify the prompt is vision-specific + args = mock_vlm_instance.get_vision_completion_async.call_args + assert "image" in args[0][0].lower() + + +@pytest.mark.asyncio +async def test_semantic_processor_binary_file_fallback(): + """Test fallback when VLM is not available for binary files""" + processor = SemanticProcessor() + + mock_fs = MagicMock() + mock_fs.read_file = AsyncMock(return_value=b"\xff\xff") + + mock_vlm_config = MagicMock() + mock_vlm_config.is_available.return_value = False + + with ( + patch("openviking.storage.queuefs.semantic_processor.get_viking_fs", return_value=mock_fs), + patch( + "openviking.storage.queuefs.semantic_processor.get_openviking_config" + ) as mock_get_config, + ): + mock_get_config.return_value.vlm = mock_vlm_config + + result = await processor._generate_single_file_summary("unknown_file.bin") + + assert "[Binary file: unknown_file.bin]" in result["summary"]