diff --git a/demos/integration_with_OpenWebUI/README.md b/demos/integration_with_OpenWebUI/README.md index 38420b27de..0763037f83 100644 --- a/demos/integration_with_OpenWebUI/README.md +++ b/demos/integration_with_OpenWebUI/README.md @@ -30,8 +30,8 @@ This demo was tested on CPU but most of the models could be also run on Intel ac :sync: Windows ```bat mkdir models -ovms.exe --pull --source_model Godreign/llama-3.2-3b-instruct-openvino-int4-model --model_repository_path models --task text_generation -ovms.exe --add_to_config --config_path models\config.json --model_path Godreign\llama-3.2-3b-instruct-openvino-int4-model --model_name Godreign/llama-3.2-3b-instruct-openvino-int4-model +ovms.exe --pull --source_model OpenVINO/gpt-oss-20b-int4-ov --model_repository_path models --tool_parser gptoss --reasoning_parser gptoss --task text_generation +ovms.exe --add_to_config --config_path models\config.json --model_path OpenVINO\gpt-oss-20b-int4-ov --model_name ovms-model ovms.exe --rest_port 8000 --config_path models\config.json ``` ::: @@ -39,8 +39,8 @@ ovms.exe --rest_port 8000 --config_path models\config.json :sync: Linux ```bash mkdir models -docker run --rm -u $(id -u):$(id -g) -v $PWD/models:/models openvino/model_server:weekly --pull --source_model Godreign/llama-3.2-3b-instruct-openvino-int4-model --model_repository_path /models --task text_generation -docker run --rm -u $(id -u):$(id -g) -v $PWD/models:/models openvino/model_server:weekly --add_to_config --config_path /models/config.json --model_path Godreign/llama-3.2-3b-instruct-openvino-int4-model --model_name Godreign/llama-3.2-3b-instruct-openvino-int4-model +docker run --rm -u $(id -u):$(id -g) -v $PWD/models:/models openvino/model_server:weekly --pull --source_model OpenVINO/gpt-oss-20b-int4-ov --model_repository_path /models --task text_generation --tool_parser gptoss --reasoning_parser gptoss +docker run --rm -u $(id -u):$(id -g) -v $PWD/models:/models openvino/model_server:weekly --add_to_config --config_path /models/config.json --model_path OpenVINO/gpt-oss-20b-int4-ov --model_name ovms-model docker run -d -u $(id -u):$(id -g) -v $PWD/models:/models -p 8000:8000 openvino/model_server:weekly --rest_port 8000 --config_path /models/config.json ``` ::: @@ -49,7 +49,7 @@ docker run -d -u $(id -u):$(id -g) -v $PWD/models:/models -p 8000:8000 openvino/ Here is the basic call to check if it works: ```console -curl http://localhost:8000/v3/chat/completions -H "Content-Type: application/json" -d "{\"model\":\"Godreign/llama-3.2-3b-instruct-openvino-int4-model\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a helpful assistant.\"},{\"role\":\"user\",\"content\":\"Say this is a test\"}]}" +curl http://localhost:8000/v3/chat/completions -H "Content-Type: application/json" -d "{\"model\":\"ovms-model\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a helpful assistant.\"},{\"role\":\"user\",\"content\":\"Say this is a test\"}]}" ``` ## Step 2: Install and start OpenWebUI @@ -86,7 +86,7 @@ Go to [http://localhost:8080](http://localhost:8080) and create admin account to 1. Go to **Admin Panel** → **Settings** → **Connections** ([http://localhost:8080/admin/settings/connections](http://localhost:8080/admin/settings/connections)) 2. Click **+Add Connection** under **OpenAI API** * URL: `http://localhost:8000/v3` - * Model IDs: put `Godreign/llama-3.2-3b-instruct-openvino-int4-model` and click **+** to add the model, or leave empty to include all models + * Model IDs: put `ovms-model` and click **+** to add the model, or leave empty to include all models 3. Click **Save** ![connection setting](./connection_setting.png) @@ -278,21 +278,21 @@ Method 2: ### Step 1: Model Preparation -The vision language model used in this demo is [OpenVINO/InternVL2-2B-int4-ov](https://huggingface.co/OpenVINO/InternVL2-2B-int4-ov). Run the ovms with --pull parameter to download and quantize the model: +The vision language model used in this demo is `Junrui2021/Qwen3-VL-8B-Instruct-int4`. Run the ovms with --pull parameter to download and quantize the model: ::::{tab-set} :::{tab-item} Windows :sync: Windows ```bat -ovms.exe --pull --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenVINO/InternVL2-2B-int4-ov --task text_generation -ovms.exe --add_to_config --config_path models\config.json --model_path OpenVINO\InternVL2-2B-int4-ov --model_name OpenVINO/InternVL2-2B-int4-ov +ovms.exe --pull --source_model Junrui2021/Qwen3-VL-8B-Instruct-int4 --model_repository_path models --model_name ovms-model-vl --task text_generation --pipeline_type VLM_CB +ovms.exe --add_to_config --config_path models\config.json --model_path models\Junrui2021\Qwen3-VL-8B-Instruct-int4 --model_name ovms-model-vl ``` ::: :::{tab-item} Linux (using Docker) :sync: Linux ```bash -docker run --rm -u $(id -u):$(id -g) -v $PWD/models:/models openvino/model_server:weekly --pull --source_model OpenVINO/InternVL2-2B-int4-ov --model_repository_path models --model_name OpenVINO/InternVL2-2B-int4-ov --task text_generation -docker run --rm -u $(id -u):$(id -g) -v $PWD/models:/models openvino/model_server:weekly --add_to_config --config_path /models/config.json --model_path OpenVINO/InternVL2-2B-int4-ov --model_name OpenVINO/InternVL2-2B-int4-ov +docker run --rm -u $(id -u):$(id -g) -v $PWD/models:/models openvino/model_server:weekly --pull --source_model Junrui2021/Qwen3-VL-8B-Instruct-int4 --model_repository_path /models --model_name ovms-model-vl --task text_generation --pipeline_type VLM_CB +docker run --rm -u $(id -u):$(id -g) -v $PWD/models:/models openvino/model_server:weekly --add_to_config --config_path /models/config.json --model_path /models/Junrui2021/Qwen3-VL-8B-Instruct-int4 --model_name ovms-model-vl ``` ::: :::: @@ -300,12 +300,12 @@ docker run --rm -u $(id -u):$(id -g) -v $PWD/models:/models openvino/model_serve Keep the model server running or restart it. Here is the basic call to check if it works: ```console -curl http://localhost:8000/v3/chat/completions -H "Content-Type: application/json" -d "{ \"model\": \"OpenVINO/InternVL2-2B-int4-ov\", \"messages\":[{\"role\": \"user\", \"content\": [{\"type\": \"text\", \"text\": \"what is in the picture?\"},{\"type\": \"image_url\", \"image_url\": {\"url\": \"http://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/3/demos/common/static/images/zebra.jpeg\"}}]}], \"max_completion_tokens\": 100}" +curl http://localhost:8000/v3/chat/completions -H "Content-Type: application/json" -d "{ \"model\": \"ovms-model-vl\", \"messages\":[{\"role\": \"user\", \"content\": [{\"type\": \"text\", \"text\": \"what is in the picture?\"},{\"type\": \"image_url\", \"image_url\": {\"url\": \"http://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/3/demos/common/static/images/zebra.jpeg\"}}]}], \"max_completion_tokens\": 100}" ``` ### Step 2: Chat with VLM -1. Start a **New Chat** with model set to `OpenVINO/InternVL2-2B-int4-ov` +1. Start a **New Chat** and choose `ovms-model-vl` model 2. Click **+More** to upload images, by capturing the screen or uploading files. The image used in this demo is [http://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/3/demos/common/static/images/zebra.jpeg](http://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2025/3/demos/common/static/images/zebra.jpeg). ![upload images](./upload_images.png) @@ -332,8 +332,8 @@ mcpo --port 9000 -- python -m mcp_weather_server ### Step 2: Tools Setting -1. Go to **Admin Panel** → **Settings** → **External Tools** -2. Click **+Add Connection** +1. Go to **Admin Panel** → **Settings** → **Integrations** +2. Click **+Manage Tool Servers** * URL: `http://localhost:9000` * Name the tool 3. Click **Save** @@ -342,7 +342,7 @@ mcpo --port 9000 -- python -m mcp_weather_server ### Step 3: Chat with AI Agent -1. Click **+More** and toggle on the tool +1. Click **Integrations** → **Tools** and toggle on the tool ![activate the tool](./activate_the_tool.png) @@ -354,6 +354,100 @@ mcpo --port 9000 -- python -m mcp_weather_server [https://docs.openwebui.com/features/plugin/tools/openapi-servers/open-webui](https://docs.openwebui.com/features/plugin/tools/openapi-servers/open-webui#step-2-connect-tool-server-in-open-webui) +## Using Web Search + +### Step 1: Configure WebSearch + +1. Go to **Admin Panel** → **Settings** → **Web Search** +2. Enable **Web Search** +3. Choose **Web Search Engine** +4. Add **API Key** +5. Click **Save** + +![web search configuration](./web_search_config.png) + +### Step 2: Enable Web Search in model + +1. Go to **Admin Panel** → **Settings** → **Models** +2. Choose desired model +3. Enable **Web Search** capability +4. In **Default Features** enable **Web Search** or toggle it in the chat +5. In **Advanced Parameters** set **Function Calling** to **Native** + +![function calling native](./funcition_calling_native_set.png) + +![web search model configuration](./web_search_model_config.png) + + +### Step 3: Use Web Search in the chat + +1. Open new Chat +2. Enable **Web Search**, if it's not displayed as blue icon below. +3. Send the prompt + +![web search usage](./web_search_usage.png) + +### Reference +[https://docs.openwebui.com/features/chat-conversations/web-search/agentic-search/](https://docs.openwebui.com/features/chat-conversations/web-search/agentic-search/) + + +## Adding Context to the prompt + +In Open WebUI, users can add additional context to their chats using the **Memory** feature. This allows models to access shared information across all conversations. + +To configure it: + +1. Go to **Settings** → **Personalization** +2. Enable **Memory** +3. Click **Manage** +4. Click **Add Memory** +5. Enter the information + +![add memory](./add_memory.png) + +It's possible to have multiple manageable memory records. + +![multiple memory records](./multiple_memory_records.png) + +Then workspace model should be created: + +1. Go to **Workspace** → **Models** +2. Choose model or create it. +3. In **Buildin Tools** section enable **Memory** +4. In **Advanced Parameters** set **Function Calling** to **Native** + +![function calling native](./function_calling_native_set_workspace.pngs) + +![model memory config](./model_memory_configuration.png) + +It's now available in all chats: + +![memory usage](./memory_usage.png) + +> **Note**: There is no way to make searching memory default on the beginning of the conversation in Open Web UI. User should tell model to use it to make it work. + +### Reference +[https://docs.openwebui.com/features/chat-conversations/memory/](https://docs.openwebui.com/features/chat-conversations/memory/) + +## Code Interpreting + +It's available to use **Code Interpreter** feature in Open Web UI. + +1. Go to **Admin Panel** → **Settings** → **Models** +2. Choose desired model +3. Enable **Code Interpreter** capability +4. In **Default Features** enable **Code Interpreter** or toggle it in the chat +5. In **Advanced Parameters** set **Function Calling** to **Native** + +![function calling native](./function_calling_native_set.png) + +6. Go to **Admin Panel** → **Settings** → **Code Execution** +7. Enable **Code Interpreter** and **Code Execution** + +Then it's ready to use. In new chat it's possible to toggle **Code Interpreter** and write a prompt. + +![code execution](./code_execution.png) + ## Audio > **Note:** To ensure audio features work correctly, download [FFmpeg](https://ffmpeg.org/download.html) and add its executable directory to your system's `PATH` environment variable. diff --git a/demos/integration_with_OpenWebUI/activate_the_tool.png b/demos/integration_with_OpenWebUI/activate_the_tool.png index a9f0ddaf3a..ffcf0b61b8 100644 Binary files a/demos/integration_with_OpenWebUI/activate_the_tool.png and b/demos/integration_with_OpenWebUI/activate_the_tool.png differ diff --git a/demos/integration_with_OpenWebUI/add_memory.png b/demos/integration_with_OpenWebUI/add_memory.png new file mode 100644 index 0000000000..f178a55cfb Binary files /dev/null and b/demos/integration_with_OpenWebUI/add_memory.png differ diff --git a/demos/integration_with_OpenWebUI/chat_demo.png b/demos/integration_with_OpenWebUI/chat_demo.png index 5a2278d839..0d41c7595a 100644 Binary files a/demos/integration_with_OpenWebUI/chat_demo.png and b/demos/integration_with_OpenWebUI/chat_demo.png differ diff --git a/demos/integration_with_OpenWebUI/chat_with_AI_Agent_demo.png b/demos/integration_with_OpenWebUI/chat_with_AI_Agent_demo.png index ca8215be7c..c00c8663bc 100644 Binary files a/demos/integration_with_OpenWebUI/chat_with_AI_Agent_demo.png and b/demos/integration_with_OpenWebUI/chat_with_AI_Agent_demo.png differ diff --git a/demos/integration_with_OpenWebUI/chat_with_RAG_demo.png b/demos/integration_with_OpenWebUI/chat_with_RAG_demo.png index 302b866f6e..5a2ac3e827 100644 Binary files a/demos/integration_with_OpenWebUI/chat_with_RAG_demo.png and b/demos/integration_with_OpenWebUI/chat_with_RAG_demo.png differ diff --git a/demos/integration_with_OpenWebUI/chat_with_VLM_demo.png b/demos/integration_with_OpenWebUI/chat_with_VLM_demo.png index 267f716d6c..3f4be189d5 100644 Binary files a/demos/integration_with_OpenWebUI/chat_with_VLM_demo.png and b/demos/integration_with_OpenWebUI/chat_with_VLM_demo.png differ diff --git a/demos/integration_with_OpenWebUI/code_execution.png b/demos/integration_with_OpenWebUI/code_execution.png new file mode 100644 index 0000000000..d31c041605 Binary files /dev/null and b/demos/integration_with_OpenWebUI/code_execution.png differ diff --git a/demos/integration_with_OpenWebUI/connection_setting.png b/demos/integration_with_OpenWebUI/connection_setting.png index 11715abeb3..b670809ed7 100644 Binary files a/demos/integration_with_OpenWebUI/connection_setting.png and b/demos/integration_with_OpenWebUI/connection_setting.png differ diff --git a/demos/integration_with_OpenWebUI/create_and_configure_the_RAG-enabled_model.png b/demos/integration_with_OpenWebUI/create_and_configure_the_RAG-enabled_model.png index f1efd6ce6e..5032beabaf 100644 Binary files a/demos/integration_with_OpenWebUI/create_and_configure_the_RAG-enabled_model.png and b/demos/integration_with_OpenWebUI/create_and_configure_the_RAG-enabled_model.png differ diff --git a/demos/integration_with_OpenWebUI/function_calling_native_set.png b/demos/integration_with_OpenWebUI/function_calling_native_set.png new file mode 100644 index 0000000000..d08c8232a7 Binary files /dev/null and b/demos/integration_with_OpenWebUI/function_calling_native_set.png differ diff --git a/demos/integration_with_OpenWebUI/function_calling_native_set_workspace.png b/demos/integration_with_OpenWebUI/function_calling_native_set_workspace.png new file mode 100644 index 0000000000..be5bd3a909 Binary files /dev/null and b/demos/integration_with_OpenWebUI/function_calling_native_set_workspace.png differ diff --git a/demos/integration_with_OpenWebUI/memory_usage.png b/demos/integration_with_OpenWebUI/memory_usage.png new file mode 100644 index 0000000000..eb202fd412 Binary files /dev/null and b/demos/integration_with_OpenWebUI/memory_usage.png differ diff --git a/demos/integration_with_OpenWebUI/model_memory_configuration.png b/demos/integration_with_OpenWebUI/model_memory_configuration.png new file mode 100644 index 0000000000..bcc13f9769 Binary files /dev/null and b/demos/integration_with_OpenWebUI/model_memory_configuration.png differ diff --git a/demos/integration_with_OpenWebUI/multiple_memory_records.png b/demos/integration_with_OpenWebUI/multiple_memory_records.png new file mode 100644 index 0000000000..d2bc00cb92 Binary files /dev/null and b/demos/integration_with_OpenWebUI/multiple_memory_records.png differ diff --git a/demos/integration_with_OpenWebUI/select_documents.png b/demos/integration_with_OpenWebUI/select_documents.png index 060ff1a0a1..a45bb0ffc0 100644 Binary files a/demos/integration_with_OpenWebUI/select_documents.png and b/demos/integration_with_OpenWebUI/select_documents.png differ diff --git a/demos/integration_with_OpenWebUI/tools_setting.png b/demos/integration_with_OpenWebUI/tools_setting.png index 3ca1853951..2917de2f55 100644 Binary files a/demos/integration_with_OpenWebUI/tools_setting.png and b/demos/integration_with_OpenWebUI/tools_setting.png differ diff --git a/demos/integration_with_OpenWebUI/upload_images.png b/demos/integration_with_OpenWebUI/upload_images.png index 2b55c16add..29dcc71791 100644 Binary files a/demos/integration_with_OpenWebUI/upload_images.png and b/demos/integration_with_OpenWebUI/upload_images.png differ diff --git a/demos/integration_with_OpenWebUI/voice_mode.png b/demos/integration_with_OpenWebUI/voice_mode.png index 878e52ab8c..8ce724df0e 100644 Binary files a/demos/integration_with_OpenWebUI/voice_mode.png and b/demos/integration_with_OpenWebUI/voice_mode.png differ diff --git a/demos/integration_with_OpenWebUI/web_search_config.png b/demos/integration_with_OpenWebUI/web_search_config.png new file mode 100644 index 0000000000..47010cdda0 Binary files /dev/null and b/demos/integration_with_OpenWebUI/web_search_config.png differ diff --git a/demos/integration_with_OpenWebUI/web_search_model_config.png b/demos/integration_with_OpenWebUI/web_search_model_config.png new file mode 100644 index 0000000000..6037c6991f Binary files /dev/null and b/demos/integration_with_OpenWebUI/web_search_model_config.png differ diff --git a/demos/integration_with_OpenWebUI/web_search_usage.png b/demos/integration_with_OpenWebUI/web_search_usage.png new file mode 100644 index 0000000000..fcd4cfb1ab Binary files /dev/null and b/demos/integration_with_OpenWebUI/web_search_usage.png differ diff --git a/spelling-whitelist.txt b/spelling-whitelist.txt index 1c7ba3e8fe..bb3514e0fb 100644 --- a/spelling-whitelist.txt +++ b/spelling-whitelist.txt @@ -26,4 +26,5 @@ release_files/thirdparty-licenses/libgt2.LICENSE.txt:1065: rouines ==> routines release_files/thirdparty-licenses/libgt2.LICENSE.txt:1083: publically ==> publicly src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp demos/vlm_npu/README.md:157: mane ==> main, many, maine -demos/vlm_npu/README.md:218: mane ==> main, many, maine \ No newline at end of file +demos/vlm_npu/README.md:218: mane ==> main, many, maine +demos/integration_with_OpenWebUI/README.md:416: Buildin ==> Building, Build in \ No newline at end of file