diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
index 6d4cc501c..40a682b2d 100644
--- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
+++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
@@ -1819,6 +1819,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "The selec
-- Select a provider first
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Select a provider first"
+-- Estimated amount of tokens:
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Estimated amount of tokens:"
+
-- Start new chat in workspace '{0}'
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Start new chat in workspace '{0}'"
@@ -3553,6 +3556,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2331453405"] = "(O
-- Add
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2646845972"] = "Add"
+-- Selected file path for the custom tokenizer
+UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T278585345"] = "Selected file path for the custom tokenizer"
+
-- No models loaded or available.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2810182573"] = "No models loaded or available."
@@ -3562,6 +3568,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2842060373"] = "In
-- Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T290547799"] = "Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually."
+-- Choose a custom tokenizer here
+UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T3787466119"] = "Choose a custom tokenizer here"
+
-- Model selection
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T416738168"] = "Model selection"
@@ -5398,6 +5407,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startup log file
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Browse AI Studio's source code on GitHub — we welcome your contributions."
+-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer."
+
-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID mismatch: the plugin ID differs from the enterprise configuration ID."
@@ -5638,6 +5650,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "This is a library
-- Used .NET SDK
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Used .NET SDK"
+-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate."
+
-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated."
@@ -6664,29 +6679,80 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T304
-- AI source selection with AI retrieval context validation
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T3775725978"] = "AI source selection with AI retrieval context validation"
--- Executable Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2217313358"] = "Executable Files"
+-- Text
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1041509726"] = "Text"
+
+-- Office Files
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1063218378"] = "Office Files"
+
+-- Executable
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1364437037"] = "Executable"
+
+-- Mail
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1399880782"] = "Mail"
+
+-- Source like
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1487238587"] = "Source like"
+
+-- Image
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1494001562"] = "Image"
+
+-- Video
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1533528076"] = "Video"
+
+-- Source Code
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1569048941"] = "Source Code"
+
+-- Config
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1779622119"] = "Config"
+
+-- Audio
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T2291602489"] = "Audio"
+
+-- Custom
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T2502277006"] = "Custom"
+
+-- Media
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T3507473059"] = "Media"
+
+-- Source like prefix
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T378481461"] = "Source like prefix"
+
+-- Document
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T4165204724"] = "Document"
+
+-- Text
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1041509726"] = "Text"
+
+-- Office Files
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1063218378"] = "Office Files"
+
+-- Executable
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1364437037"] = "Executable"
+
+-- Image
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1494001562"] = "Image"
--- All Source Code Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2460199369"] = "All Source Code Files"
+-- Video
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1533528076"] = "Video"
--- All Audio Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2575722901"] = "All Audio Files"
+-- Source Code
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1569048941"] = "Source Code"
--- All Video Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2850789856"] = "All Video Files"
+-- Config
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1779622119"] = "Config"
--- PDF Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T3108466742"] = "PDF Files"
+-- Audio
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T2291602489"] = "Audio"
--- All Image Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T4086723714"] = "All Image Files"
+-- Custom
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T2502277006"] = "Custom"
--- Text Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T639143005"] = "Text Files"
+-- Media
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T3507473059"] = "Media"
--- All Office Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T709668067"] = "All Office Files"
+-- Document
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T4165204724"] = "Document"
-- Pandoc Installation
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::PANDOCAVAILABILITYSERVICE::T185447014"] = "Pandoc Installation"
diff --git a/app/MindWork AI Studio/Chat/FileAttachment.cs b/app/MindWork AI Studio/Chat/FileAttachment.cs
index f364ed8fa..1ce4f1d98 100644
--- a/app/MindWork AI Studio/Chat/FileAttachment.cs
+++ b/app/MindWork AI Studio/Chat/FileAttachment.cs
@@ -58,11 +58,14 @@ public record FileAttachment(FileAttachmentType Type, string FileName, string Fi
/// extracting the filename, and reading the file size.
///
/// The full path to the file.
+ /// Optional: The allowed file types.
/// A FileAttachment instance with populated properties.
- public static FileAttachment FromPath(string filePath)
+ public static FileAttachment FromPath(string filePath, FileType[]? allowedTypes=null)
{
var fileName = Path.GetFileName(filePath);
var fileSize = File.Exists(filePath) ? new FileInfo(filePath).Length : 0;
+ if (allowedTypes != null && !IsAllowed(filePath, allowedTypes))
+ return new FileAttachment(FileAttachmentType.FORBIDDEN, fileName, filePath, fileSize);
var type = DetermineFileType(filePath);
return type switch
@@ -76,34 +79,24 @@ public static FileAttachment FromPath(string filePath)
///
/// Determines the file attachment type based on the file extension.
- /// Uses centrally defined file type filters from .
+ /// Uses centrally defined file type filters from .
///
/// The file path to analyze.
/// The corresponding FileAttachmentType.
private static FileAttachmentType DetermineFileType(string filePath)
{
- var extension = Path.GetExtension(filePath).TrimStart('.').ToLowerInvariant();
-
- if (FileTypeFilter.Executables.FilterExtensions.Contains(extension))
+ if (FileTypes.IsAllowedPath(filePath, FileTypes.EXECUTABLES))
return FileAttachmentType.FORBIDDEN;
- // Check if it's an image file:
- if (FileTypeFilter.AllImages.FilterExtensions.Contains(extension))
+ if (FileTypes.IsAllowedPath(filePath, FileTypes.IMAGE))
return FileAttachmentType.IMAGE;
+ }
- // Check if it's an audio file:
- if (FileTypeFilter.AllAudio.FilterExtensions.Contains(extension))
+ if (FileTypes.IsAllowedPath(filePath, FileTypes.AUDIO))
return FileAttachmentType.AUDIO;
- // Check if it's an allowed document file (PDF, Text, or Office):
- if (FileTypeFilter.PDF.FilterExtensions.Contains(extension) ||
- FileTypeFilter.Text.FilterExtensions.Contains(extension) ||
- FileTypeFilter.AllOffice.FilterExtensions.Contains(extension) ||
- FileTypeFilter.AllSourceCode.FilterExtensions.Contains(extension) ||
- FileTypeFilter.IsAllowedSourceLikeFileName(filePath))
- return FileAttachmentType.DOCUMENT;
-
- // All other file types are forbidden:
- return FileAttachmentType.FORBIDDEN;
+ return FileTypes.IsAllowedPath(filePath, FileTypes.DOCUMENT)
+ ? FileAttachmentType.DOCUMENT
+ : FileAttachmentType.FORBIDDEN;
}
-}
\ No newline at end of file
+}
diff --git a/app/MindWork AI Studio/Components/AttachDocuments.razor.cs b/app/MindWork AI Studio/Components/AttachDocuments.razor.cs
index acfc0dd2f..0608125a7 100644
--- a/app/MindWork AI Studio/Components/AttachDocuments.razor.cs
+++ b/app/MindWork AI Studio/Components/AttachDocuments.razor.cs
@@ -48,6 +48,9 @@ public partial class AttachDocuments : MSGComponentBase
[Parameter]
public bool UseSmallForm { get; set; }
+ [Parameter]
+ public FileType[]? AllowedFileTypes { get; set; }
+
///
/// When true, validate media file types before attaching. Default is true. That means that
/// the user cannot attach unsupported media file types when the provider or model does not
@@ -181,7 +184,6 @@ protected override async Task OnInitializedAsync()
{
if(!await FileExtensionValidation.IsExtensionValidWithNotifyAsync(FileExtensionValidation.UseCase.ATTACHING_CONTENT, path, this.ValidateMediaFileTypes, this.Provider))
continue;
-
this.DocumentPaths.Add(FileAttachment.FromPath(path));
}
@@ -226,7 +228,7 @@ private async Task AddFilesManually()
if (!await FileExtensionValidation.IsExtensionValidWithNotifyAsync(FileExtensionValidation.UseCase.ATTACHING_CONTENT, selectedFilePath, this.ValidateMediaFileTypes, this.Provider))
continue;
- this.DocumentPaths.Add(FileAttachment.FromPath(selectedFilePath));
+ this.DocumentPaths.Add(FileAttachment.FromPath(selectedFilePath, this.AllowedFileTypes));
}
await this.DocumentPathsChanged.InvokeAsync(this.DocumentPaths);
diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor b/app/MindWork AI Studio/Components/ChatComponent.razor
index 20bb5ec47..ced007115 100644
--- a/app/MindWork AI Studio/Components/ChatComponent.razor
+++ b/app/MindWork AI Studio/Components/ChatComponent.razor
@@ -34,7 +34,7 @@
-
diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor.cs b/app/MindWork AI Studio/Components/ChatComponent.razor.cs
index f734d620f..9bfa27bda 100644
--- a/app/MindWork AI Studio/Components/ChatComponent.razor.cs
+++ b/app/MindWork AI Studio/Components/ChatComponent.razor.cs
@@ -3,6 +3,7 @@
using AIStudio.Provider;
using AIStudio.Settings;
using AIStudio.Settings.DataModel;
+using AIStudio.Tools.Services;
using Microsoft.AspNetCore.Components;
using Microsoft.AspNetCore.Components.Web;
@@ -44,6 +45,8 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
[Inject]
private IDialogService DialogService { get; init; } = null!;
+ [Inject]
+ private RustService RustService { get; init; } = null!;
[Inject]
private IJSRuntime JsRuntime { get; init; } = null!;
@@ -69,10 +72,12 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
private Guid currentChatThreadId = Guid.Empty;
private CancellationTokenSource? cancellationTokenSource;
private HashSet chatDocumentPaths = [];
+ private string tokenCount = "0";
+ private string TokenCountMessage => $"{this.T("Estimated amount of tokens:")} {this.tokenCount}";
// Unfortunately, we need the input field reference to blur the focus away. Without
// this, we cannot clear the input field.
- private MudTextField inputField = null!;
+ private UserPromptComponent inputField = null!;
#region Overrides of ComponentBase
@@ -460,6 +465,9 @@ private async Task InputKeyEvent(KeyboardEventArgs keyEvent)
// Was a modifier key pressed as well?
var isModifier = keyEvent.AltKey || keyEvent.CtrlKey || keyEvent.MetaKey || keyEvent.ShiftKey;
+ if (isEnter)
+ await this.CalculateTokenCount();
+
// Depending on the user's settings, might react to shortcuts:
switch (this.SettingsManager.ConfigurationData.Chat.ShortcutSendBehavior)
{
@@ -591,6 +599,7 @@ private async Task SendMessage(bool reuseLastUserPrompt = false)
this.chatDocumentPaths.Clear();
await this.inputField.BlurAsync();
+ this.tokenCount = "0";
// Enable the stream state for the chat component:
this.isStreaming = true;
@@ -973,6 +982,20 @@ private Task EditLastBlock(IContent block)
return Task.CompletedTask;
}
+ private async Task CalculateTokenCount()
+ {
+ if (this.inputField.Value is null)
+ {
+ this.tokenCount = "0";
+ return;
+ }
+ var response = await this.RustService.GetTokenCount(this.inputField.Value);
+ if (response is null)
+ return;
+ this.tokenCount = response.TokenCount.ToString();
+ this.StateHasChanged();
+ }
+
#region Overrides of MSGComponentBase
protected override async Task ProcessIncomingMessage(ComponentBase? sendingComponent, Event triggeredEvent, T? data) where T : default
diff --git a/app/MindWork AI Studio/Components/SelectFile.razor b/app/MindWork AI Studio/Components/SelectFile.razor
index de3971e52..561b11c0d 100644
--- a/app/MindWork AI Studio/Components/SelectFile.razor
+++ b/app/MindWork AI Studio/Components/SelectFile.razor
@@ -11,6 +11,7 @@
AdornmentIcon="@Icons.Material.Filled.AttachFile"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
Variant="Variant.Outlined"
+ Clearable="this.IsClearable"
/>
diff --git a/app/MindWork AI Studio/Components/SelectFile.razor.cs b/app/MindWork AI Studio/Components/SelectFile.razor.cs
index 9caf3cd77..06826ca29 100644
--- a/app/MindWork AI Studio/Components/SelectFile.razor.cs
+++ b/app/MindWork AI Studio/Components/SelectFile.razor.cs
@@ -23,16 +23,19 @@ public partial class SelectFile : MSGComponentBase
public string FileDialogTitle { get; set; } = "Select File";
[Parameter]
- public FileTypeFilter? Filter { get; set; }
+ public FileTypeFilter[]? Filter { get; set; }
[Parameter]
public Func Validation { get; set; } = _ => null;
+
+ [Parameter]
+ public bool IsClearable { get; set; } = false;
[Inject]
public RustService RustService { get; set; } = null!;
[Inject]
- protected ILogger Logger { get; init; } = null!;
+ protected ILogger Logger { get; init; } = null!;
private static readonly Dictionary SPELLCHECK_ATTRIBUTES = new();
diff --git a/app/MindWork AI Studio/Components/UserPromptComponent.cs b/app/MindWork AI Studio/Components/UserPromptComponent.cs
new file mode 100644
index 000000000..03139a525
--- /dev/null
+++ b/app/MindWork AI Studio/Components/UserPromptComponent.cs
@@ -0,0 +1,68 @@
+using Microsoft.AspNetCore.Components;
+using Timer = System.Timers.Timer;
+
+namespace AIStudio.Components;
+
+///
+/// Debounced multi-line text input built on .
+/// Keeps the base API while adding a debounce timer.
+/// Callers can override any property as usual.
+///
+public class UserPromptComponent : MudTextField
+{
+ [Parameter]
+ public TimeSpan DebounceTime { get; set; } = TimeSpan.FromMilliseconds(800);
+
+ [Parameter]
+ public Func WhenTextChangedAsync { get; set; } = _ => Task.CompletedTask;
+
+ private readonly Timer debounceTimer = new();
+ private string text = string.Empty;
+ private string lastParameterText = string.Empty;
+ private string lastNotifiedText = string.Empty;
+ private bool isInitialized;
+
+ protected override async Task OnInitializedAsync()
+ {
+ this.text = this.Text ?? string.Empty;
+ this.lastParameterText = this.text;
+ this.lastNotifiedText = this.text;
+ this.debounceTimer.AutoReset = false;
+ this.debounceTimer.Interval = this.DebounceTime.TotalMilliseconds;
+ this.debounceTimer.Elapsed += (_, _) =>
+ {
+ this.debounceTimer.Stop();
+ if (this.text == this.lastNotifiedText)
+ return;
+
+ this.lastNotifiedText = this.text;
+ this.InvokeAsync(async () => await this.TextChanged.InvokeAsync(this.text));
+ this.InvokeAsync(async () => await this.WhenTextChangedAsync(this.text));
+ };
+
+ this.isInitialized = true;
+ await base.OnInitializedAsync();
+ }
+
+ protected override async Task OnParametersSetAsync()
+ {
+ // Ensure the timer uses the latest debouncing interval:
+ if (!this.isInitialized)
+ return;
+
+ if(Math.Abs(this.debounceTimer.Interval - this.DebounceTime.TotalMilliseconds) > 1)
+ this.debounceTimer.Interval = this.DebounceTime.TotalMilliseconds;
+
+ // Only sync when the parent's parameter actually changed since the last change:
+ if (this.Text != this.lastParameterText)
+ {
+ this.text = this.Text ?? string.Empty;
+ this.lastParameterText = this.text;
+ }
+
+ this.debounceTimer.Stop();
+ this.debounceTimer.Start();
+
+ await base.OnParametersSetAsync();
+ }
+}
diff --git a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor
index 85e6e6eff..421dae839 100644
--- a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor
+++ b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor
@@ -1,5 +1,6 @@
@using AIStudio.Provider
@using AIStudio.Provider.SelfHosted
+@using AIStudio.Tools.Rust
@inherits MSGComponentBase
@@ -7,7 +8,7 @@
@* ReSharper disable once CSharpWarnings::CS8974 *@
-
+
@foreach (LLMProviders provider in Enum.GetValues(typeof(LLMProviders)))
{
if (provider.ProvideEmbeddingAPI() || provider is LLMProviders.NONE)
@@ -22,7 +23,7 @@
@T("Create account")
-
+
@if (this.DataLLMProvider.IsAPIKeyNeeded(this.DataHost))
{
@@ -71,15 +72,14 @@
AdornmentColor="Color.Info"
Validation="@this.ValidateManuallyModel"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
- HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")"
- />
+ HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")"/>
}
else
{
@T("Load")
- @if(this.availableModels.Count is 0)
+ @if (this.availableModels.Count is 0)
{
@T("No models loaded or available.")
@@ -122,9 +122,13 @@
AdornmentIcon="@Icons.Material.Filled.Lightbulb"
AdornmentColor="Color.Info"
Validation="@this.providerValidation.ValidatingInstanceName"
- UserAttributes="@SPELLCHECK_ATTRIBUTES"
- />
-
+ UserAttributes="@SPELLCHECK_ATTRIBUTES"/>
+
+ @T("For better embeddings and less storage usage, it's recommended to use a custom tokenizer to enable a more accurate token count.")
+
+ @if (this.DataModel != default){
+
+ }
@@ -133,7 +137,7 @@
@T("Cancel")
- @if(this.IsEditing)
+ @if (this.IsEditing)
{
@T("Update")
}
@@ -143,4 +147,4 @@
}
-
\ No newline at end of file
+
diff --git a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs
index 6520b7ee7..b45d687bc 100644
--- a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs
+++ b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs
@@ -1,3 +1,4 @@
+using AIStudio.Chat;
using AIStudio.Components;
using AIStudio.Provider;
using AIStudio.Settings;
@@ -89,6 +90,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
private string dataAPIKeyStorageIssue = string.Empty;
private string dataEditingPreviousInstanceName = string.Empty;
private string dataLoadingModelsIssue = string.Empty;
+ private string dataFilePath = string.Empty;
// We get the form reference from Blazor code to validate it manually:
private MudForm form = null!;
@@ -96,7 +98,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
private readonly List availableModels = new();
private readonly Encryption encryption = Program.ENCRYPTION;
private readonly ProviderValidation providerValidation;
-
+
public EmbeddingProviderDialog()
{
this.providerValidation = new()
@@ -264,6 +266,13 @@ private async Task OnAPIKeyChanged(string apiKey)
await this.form.Validate();
}
}
+
+ private async Task OnDataFilePathChanged(string filePath)
+ {
+ await this.RustService.ValidateAndStoreTokenizer(this.DataModel.DisplayName, filePath);
+ }
+
+
private void OnHostChanged(Host selectedHost)
{
@@ -307,4 +316,4 @@ private async Task ReloadModels()
};
private bool IsNoneProvider => this.DataLLMProvider is LLMProviders.NONE;
-}
\ No newline at end of file
+}
diff --git a/app/MindWork AI Studio/Pages/Information.razor b/app/MindWork AI Studio/Pages/Information.razor
index b7b9aea41..665afad69 100644
--- a/app/MindWork AI Studio/Pages/Information.razor
+++ b/app/MindWork AI Studio/Pages/Information.razor
@@ -290,6 +290,8 @@
+
+
diff --git a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua
index e9571a6c8..70c61528b 100644
--- a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua
+++ b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua
@@ -1821,6 +1821,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "Der ausge
-- Select a provider first
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Wähle zuerst einen Anbieter aus"
+-- Estimated amount of tokens:
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Geschätzte Anzahl an Tokens:"
+
-- Start new chat in workspace "{0}"
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Neuen Chat im Arbeitsbereich \"{0}\" starten"
@@ -5400,6 +5403,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startprotokollda
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Sehen Sie sich den Quellcode von AI Studio auf GitHub an – wir freuen uns über ihre Beiträge."
+-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "Die Tokenizer‑Bibliothek dient als Basis‑Framework für die Integration des DeepSeek‑Tokenizers."
+
-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID-Konflikt: Die Plugin-ID stimmt nicht mit der ID der Unternehmenskonfiguration überein."
@@ -5640,6 +5646,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "Dies ist eine Bib
-- Used .NET SDK
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Verwendetes .NET SDK"
+-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "Wir verwenden den DeepSeek‑Tokenizer, um die Token‑Anzahl einer Eingabe zu schätzen."
+
-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "Diese Bibliothek wird verwendet, um Sidecar-Prozesse zu verwalten und sicherzustellen, dass veraltete oder Zombie-Sidecars erkannt und beendet werden."
@@ -6666,29 +6675,47 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T304
-- AI-based data source selection with AI retrieval context validation
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T3775725978"] = "KI-basierte Datenquellen-Auswahl mit Validierung des Abrufkontexts"
--- Executable Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2217313358"] = "Ausführbare Dateien"
+-- Text
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1041509726"] = "Text"
+
+-- Office Files
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1063218378"] = "Office-Dateien"
+
+-- Executable
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1364437037"] = "Ausführbare Dateien"
+
+-- Mail
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1399880782"] = "E-Mail"
+
+-- Source like
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1487238587"] = "Source Code ähnlich"
+
+-- Image
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1494001562"] = "Bild"
+
+-- Video
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1533528076"] = "Video"
--- All Source Code Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2460199369"] = "Alle Quellcodedateien"
+-- Source Code
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1569048941"] = "Quellcode"
--- All Audio Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2575722901"] = "Alle Audiodateien"
+-- Config
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1779622119"] = "Konfiguration"
--- All Video Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2850789856"] = "Alle Videodateien"
+-- Audio
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T2291602489"] = "Audio"
--- PDF Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T3108466742"] = "PDF-Dateien"
+-- Custom
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T2502277006"] = "Benutzerdefiniert"
--- All Image Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T4086723714"] = "Alle Bilddateien"
+-- Media
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T3507473059"] = "Medien"
--- Text Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T639143005"] = "Textdateien"
+-- Source like prefix
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T378481461"] = "Source Code ähnlicher Prefix"
--- All Office Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T709668067"] = "Alle Office-Dateien"
+-- Document
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T4165204724"] = "Dokument"
-- Pandoc Installation
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::PANDOCAVAILABILITYSERVICE::T185447014"] = "Pandoc-Installation"
diff --git a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua
index 71f6c65ad..d792e90d7 100644
--- a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua
+++ b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua
@@ -1821,6 +1821,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "The selec
-- Select a provider first
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Select a provider first"
+-- Estimated amount of tokens:
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Estimated amount of tokens:"
+
-- Start new chat in workspace "{0}"
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Start new chat in workspace \"{0}\""
@@ -5400,6 +5403,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startup log file
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Browse AI Studio's source code on GitHub — we welcome your contributions."
+-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer."
+
-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID mismatch: the plugin ID differs from the enterprise configuration ID."
@@ -5640,6 +5646,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "This is a library
-- Used .NET SDK
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Used .NET SDK"
+-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate."
+
-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated."
@@ -6666,29 +6675,47 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T304
-- AI-based data source selection with AI retrieval context validation
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T3775725978"] = "AI-based data source selection with AI retrieval context validation"
--- Executable Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2217313358"] = "Executable Files"
+-- Text
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1041509726"] = "Text"
+
+-- Office Files
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1063218378"] = "Office Files"
+
+-- Executable
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1364437037"] = "Executable"
+
+-- Mail
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1399880782"] = "Mail"
+
+-- Source like
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1487238587"] = "Source like"
+
+-- Image
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1494001562"] = "Image"
+
+-- Video
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1533528076"] = "Video"
--- All Source Code Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2460199369"] = "All Source Code Files"
+-- Source Code
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1569048941"] = "Source Code"
--- All Audio Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2575722901"] = "All Audio Files"
+-- Config
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T1779622119"] = "Config"
--- All Video Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2850789856"] = "All Video Files"
+-- Audio
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T2291602489"] = "Audio"
--- PDF Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T3108466742"] = "PDF Files"
+-- Custom
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T2502277006"] = "Custom"
--- All Image Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T4086723714"] = "All Image Files"
+-- Media
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T3507473059"] = "Media"
--- Text Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T639143005"] = "Text Files"
+-- Source like prefix
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T378481461"] = "Source like prefix"
--- All Office Files
-UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T709668067"] = "All Office Files"
+-- Document
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T4165204724"] = "Document"
-- Pandoc Installation
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::PANDOCAVAILABILITYSERVICE::T185447014"] = "Pandoc Installation"
diff --git a/app/MindWork AI Studio/Tools/PandocExport.cs b/app/MindWork AI Studio/Tools/PandocExport.cs
index 27e5244e5..e57afdd80 100644
--- a/app/MindWork AI Studio/Tools/PandocExport.cs
+++ b/app/MindWork AI Studio/Tools/PandocExport.cs
@@ -2,6 +2,7 @@
using AIStudio.Chat;
using AIStudio.Dialogs;
using AIStudio.Tools.PluginSystem;
+using AIStudio.Tools.Rust;
using AIStudio.Tools.Services;
using DialogOptions = AIStudio.Dialogs.DialogOptions;
@@ -16,7 +17,7 @@ public static class PandocExport
public static async Task ToMicrosoftWord(RustService rustService, IDialogService dialogService, string dialogTitle, IContent markdownContent)
{
- var response = await rustService.SaveFile(dialogTitle, new("Microsoft Word", ["docx"]));
+ var response = await rustService.SaveFile(dialogTitle, [FileTypes.MS_WORD]);
if (response.UserCancelled)
{
LOGGER.LogInformation("User cancelled the save dialog.");
diff --git a/app/MindWork AI Studio/Tools/Rust/FileType.cs b/app/MindWork AI Studio/Tools/Rust/FileType.cs
new file mode 100644
index 000000000..c333a6913
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/Rust/FileType.cs
@@ -0,0 +1,41 @@
+namespace AIStudio.Tools.Rust;
+
+///
+/// Represents a file type that can optionally contain child file types.
+/// Use the static helpers , and to build readable trees.
+///
+/// Display name of the type (e.g., "Document").
+/// File extensions belonging to this type (without dot).
+/// Nested file types that are included when this type is selected.
+public sealed record FileType(string FilterName, string[] FilterExtensions, IReadOnlyList Children)
+{
+ ///
+ /// Factory for a leaf node.
+ /// Example: FileType.Leaf(".NET", "cs", "razor")
+ ///
+ public static FileType Leaf(string name, params string[] extensions) =>
+ new(name, extensions, []);
+
+ ///
+ /// Factory for a parent node that only has children.
+ /// Example: FileType.Parent("Source Code", dotnet, java)
+ ///
+ public static FileType Parent(string name, params FileType[]? children) =>
+ new(name, [], children ?? []);
+
+ ///
+ /// Factory for a composite node that has its own extensions in addition to children.
+ ///
+ public static FileType Composite(string name, string[] extensions, params FileType[] children) =>
+ new(name, extensions, children);
+
+ ///
+ /// Collects all extensions for this type, including children.
+ ///
+ public IEnumerable FlattenExtensions()
+ {
+ return this.FilterExtensions
+ .Concat(this.Children.SelectMany(child => child.FlattenExtensions()))
+ .Distinct(StringComparer.OrdinalIgnoreCase);
+ }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Rust/FileTypeFilter.cs b/app/MindWork AI Studio/Tools/Rust/FileTypeFilter.cs
index d93f44e01..f4cd1c7e5 100644
--- a/app/MindWork AI Studio/Tools/Rust/FileTypeFilter.cs
+++ b/app/MindWork AI Studio/Tools/Rust/FileTypeFilter.cs
@@ -1,125 +1,49 @@
-// ReSharper disable NotAccessedPositionalProperty.Global
-
-using AIStudio.Tools.PluginSystem;
-
namespace AIStudio.Tools.Rust;
///
-/// Represents a file type filter for file selection dialogs.
+/// Represents a file type that can optionally contain child file types.
+/// Use the static helpers , and to build readable trees.
///
-/// The name of the filter.
-/// The file extensions associated with the filter.
-public readonly record struct FileTypeFilter(string FilterName, string[] FilterExtensions)
+/// Display name of the type (e.g., "Document").
+/// File extensions belonging to this type (without dot).
+/// Nested file types that are included when this type is selected.
+public sealed record FileTypeFilter(string FilterName, string[] FilterExtensions, IReadOnlyList Children)
{
- private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(FileTypeFilter).Namespace, nameof(FileTypeFilter));
-
- private static string[] AllowedSourceLikeFileNames =>
- [
- "Dockerfile",
- "Containerfile",
- "Jenkinsfile",
- "Makefile",
- "GNUmakefile",
- "Procfile",
- "Vagrantfile",
- "Tiltfile",
- "Justfile",
- "Brewfile",
- "Caddyfile",
- "Gemfile",
- "Podfile",
- "Fastfile",
- "Appfile",
- "Rakefile",
- "Dangerfile",
- "BUILD",
- "WORKSPACE",
- "BUCK",
- ];
-
- private static string[] AllowedSourceLikeFileNamePrefixes =>
- [
- "Dockerfile",
- "Containerfile",
- "Jenkinsfile",
- "Procfile",
- "Caddyfile",
- ];
-
- public static bool IsAllowedSourceLikeFileName(string filePath)
+ ///
+ /// Factory for a leaf node.
+ /// Example: FileType.Leaf(".NET", "cs", "razor")
+ ///
+ public static FileTypeFilter Leaf(string name, params string[] extensions) =>
+ new(name, extensions, []);
+
+ ///
+ /// Factory for a parent node that only has children.
+ /// Example: FileType.Parent("Source Code", dotnet, java)
+ ///
+ public static FileTypeFilter Parent(string name, params FileTypeFilter[]? children) =>
+ new(name, [], children ?? []);
+
+ ///
+ /// Factory for a composite node that has its own extensions in addition to children.
+ ///
+ public static FileTypeFilter Composite(string name, string[] extensions, params FileTypeFilter[] children) =>
+ new(name, extensions, children);
+
+ ///
+ /// Collects all extensions for this type, including children.
+ ///
+ public IEnumerable FlattenExtensions()
{
- var fileName = Path.GetFileName(filePath);
- if (string.IsNullOrWhiteSpace(fileName))
- return false;
-
- if (AllowedSourceLikeFileNames.Any(name => string.Equals(name, fileName, StringComparison.OrdinalIgnoreCase)))
+ return this.FilterExtensions
+ .Concat(this.Children.SelectMany(child => child.FlattenExtensions()))
+ .Distinct(StringComparer.OrdinalIgnoreCase);
+ }
+
+ public bool ContainsType(FileTypeFilter target)
+ {
+ if (this == target)
return true;
- return AllowedSourceLikeFileNamePrefixes.Any(prefix => fileName.StartsWith(prefix, StringComparison.OrdinalIgnoreCase));
+ return this.Children.Any(child => child.ContainsType(target));
}
-
- public static FileTypeFilter PDF => new(TB("PDF Files"), ["pdf"]);
-
- public static FileTypeFilter Text => new(TB("Text Files"), ["txt", "md"]);
-
- public static FileTypeFilter AllOffice => new(TB("All Office Files"), ["docx", "xlsx", "pptx", "doc", "xls", "ppt", "pdf"]);
-
- public static FileTypeFilter AllImages => new(TB("All Image Files"), ["jpg", "jpeg", "png", "gif", "bmp", "tiff", "svg", "webp", "heic"]);
-
- public static FileTypeFilter AllVideos => new(TB("All Video Files"), ["mp4", "m4v", "avi", "mkv", "mov", "wmv", "flv", "webm"]);
-
- public static FileTypeFilter AllAudio => new(TB("All Audio Files"), ["mp3", "wav", "wave", "aac", "flac", "ogg", "m4a", "wma", "alac", "aiff", "m4b"]);
-
- public static FileTypeFilter AllSourceCode => new(TB("All Source Code Files"),
- [
- // .NET
- "cs", "vb", "fs", "razor", "aspx", "cshtml", "csproj",
-
- // Java:
- "java",
-
- // Python:
- "py",
-
- // JavaScript/TypeScript:
- "js", "ts",
-
- // C/C++:
- "c", "cpp", "h", "hpp",
-
- // Ruby:
- "rb",
-
- // Go:
- "go",
-
- // Rust:
- "rs",
-
- // Lua:
- "lua",
-
- // PHP:
- "php",
-
- // HTML/CSS:
- "html", "css",
-
- // Swift/Kotlin:
- "swift", "kt",
-
- // Shell scripts:
- "sh", "bash",
-
- // Logging files:
- "log",
-
- // JSON/YAML/XML:
- "json", "yaml", "yml", "xml",
-
- // Config files:
- "ini", "cfg", "toml", "plist",
- ]);
-
- public static FileTypeFilter Executables => new(TB("Executable Files"), ["exe", "app", "bin", "appimage"]);
}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Rust/FileTypes.cs b/app/MindWork AI Studio/Tools/Rust/FileTypes.cs
new file mode 100644
index 000000000..789eb7d6b
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/Rust/FileTypes.cs
@@ -0,0 +1,130 @@
+using AIStudio.Tools.PluginSystem;
+
+namespace AIStudio.Tools.Rust;
+
+///
+/// Central definition of supported file types with parent/child relationships and helpers
+/// to build extension whitelists (e.g., for file pickers or validation).
+///
+public static class FileTypes
+{
+ private static string TB(string fallbackEn) => I18N.I.T(fallbackEn, typeof(FileTypeFilter).Namespace, nameof(FileTypeFilter));
+
+ // Keep SOURCE_LIKE in the same leaf style as the other file types.
+ // These values are not sufficient for Dockerfile-style files without extensions,
+ // therefore IsAllowedSourceLikeFileName is still required for real matching.
+ public static readonly FileTypeFilter SOURCE_LIKE_FILE_NAMES = FileTypeFilter.Leaf(TB("Source like"),
+ "Dockerfile", "Containerfile", "Jenkinsfile", "Makefile", "GNUmakefile", "Procfile", "Vagrantfile",
+ "Tiltfile", "Justfile", "Brewfile", "Caddyfile", "Gemfile", "Podfile", "Fastfile", "Appfile", "Rakefile", "Dangerfile",
+ "BUILD", "WORKSPACE", "BUCK");
+
+ public static readonly FileTypeFilter SOURCE_LIKE_FILE_NAME_PREFIXES = FileTypeFilter.Leaf(TB("Source like prefix"),
+ "Dockerfile", "Containerfile", "Jenkinsfile", "Procfile", "Caddyfile");
+
+ // Source code hierarchy: SourceCode -> (.NET, Java, Python, Web, C/C++, Config, ...)
+ public static readonly FileTypeFilter DOTNET = FileTypeFilter.Leaf(".NET", "cs", "razor", "vb", "fs", "aspx", "cshtml", "csproj");
+ public static readonly FileTypeFilter JAVA = FileTypeFilter.Leaf("Java", "java");
+ public static readonly FileTypeFilter PYTHON = FileTypeFilter.Leaf("Python", "py");
+ public static readonly FileTypeFilter JAVASCRIPT = FileTypeFilter.Leaf("JavaScript/TypeScript", "js", "ts");
+ public static readonly FileTypeFilter CFAMILY = FileTypeFilter.Leaf("C/C++", "c", "cpp", "h", "hpp");
+ public static readonly FileTypeFilter RUBY = FileTypeFilter.Leaf("Ruby", "rb");
+ public static readonly FileTypeFilter GO = FileTypeFilter.Leaf("Go", "go");
+ public static readonly FileTypeFilter RUST = FileTypeFilter.Leaf("Rust", "rs");
+ public static readonly FileTypeFilter LUA = FileTypeFilter.Leaf("Lua", "lua");
+ public static readonly FileTypeFilter PHP = FileTypeFilter.Leaf("PHP", "php");
+ public static readonly FileTypeFilter WEB = FileTypeFilter.Leaf("HTML/CSS", "html", "css");
+ public static readonly FileTypeFilter APP = FileTypeFilter.Leaf("Swift/Kotlin", "swift", "kt");
+ public static readonly FileTypeFilter SHELL = FileTypeFilter.Leaf("Shell", "sh", "bash", "zsh");
+ public static readonly FileTypeFilter LOG = FileTypeFilter.Leaf("Log", "log");
+ public static readonly FileTypeFilter JSON = FileTypeFilter.Leaf("JSON", "json");
+ public static readonly FileTypeFilter XML = FileTypeFilter.Leaf("XML", "xml");
+ public static readonly FileTypeFilter YAML = FileTypeFilter.Leaf("YAML", "yaml", "yml");
+ public static readonly FileTypeFilter CONFIG = FileTypeFilter.Leaf(TB("Config"), "ini", "cfg", "toml", "plist");
+
+ public static readonly FileTypeFilter SOURCE_CODE = FileTypeFilter.Parent(TB("Source Code"),
+ DOTNET, JAVA, PYTHON, JAVASCRIPT, CFAMILY, RUBY, GO, RUST, LUA, PHP, WEB, APP, SHELL, LOG, JSON, XML, YAML, CONFIG, SOURCE_LIKE_FILE_NAMES, SOURCE_LIKE_FILE_NAME_PREFIXES);
+
+ // Document hierarchy
+ public static readonly FileTypeFilter PDF = FileTypeFilter.Leaf("PDF", "pdf");
+ public static readonly FileTypeFilter TEXT = FileTypeFilter.Leaf(TB("Text"), "txt", "md", "rtf");
+ public static readonly FileTypeFilter MS_WORD = FileTypeFilter.Leaf("Microsoft Word", "docx", "doc");
+ public static readonly FileTypeFilter WORD = FileTypeFilter.Composite("Word", ["odt"], MS_WORD);
+ public static readonly FileTypeFilter EXCEL = FileTypeFilter.Leaf("Excel", "xls", "xlsx");
+ public static readonly FileTypeFilter POWER_POINT = FileTypeFilter.Leaf("PowerPoint", "ppt", "pptx");
+ public static readonly FileTypeFilter MAIL = FileTypeFilter.Leaf(TB("Mail"), "eml", "msg", "mbox");
+
+ public static readonly FileTypeFilter OFFICE_FILES = FileTypeFilter.Parent(TB("Office Files"),
+ WORD, EXCEL, POWER_POINT, PDF);
+ public static readonly FileTypeFilter DOCUMENT = FileTypeFilter.Parent(TB("Document"),
+ TEXT, OFFICE_FILES, SOURCE_CODE, MAIL);
+
+ // Media hierarchy
+ public static readonly FileTypeFilter IMAGE = FileTypeFilter.Leaf(TB("Image"),
+ "jpg", "jpeg", "png", "gif", "bmp", "tiff", "svg", "webp", "heic");
+ public static readonly FileTypeFilter AUDIO = FileTypeFilter.Leaf(TB("Audio"),
+ "mp3", "wav", "wave", "aac", "flac", "ogg", "m4a", "wma", "alac", "aiff", "m4b");
+ public static readonly FileTypeFilter VIDEO = FileTypeFilter.Leaf(TB("Video"),
+ "mp4", "m4v", "avi", "mkv", "mov", "wmv", "flv", "webm");
+
+ public static readonly FileTypeFilter MEDIA = FileTypeFilter.Parent(TB("Media"), IMAGE, AUDIO, VIDEO);
+
+ // Other standalone types
+ public static readonly FileTypeFilter EXECUTABLES = FileTypeFilter.Leaf(TB("Executable"), "exe", "app", "bin", "appimage");
+
+ public static FileTypeFilter? AsOneFileType(params FileTypeFilter[]? types)
+ {
+ if (types == null || types.Length == 0)
+ return null;
+
+ if (types.Length == 1) return types[0];
+
+ return FileTypeFilter.Composite(TB("Custom"), OnlyAllowTypes(types));
+ }
+
+ public static string[] OnlyAllowTypes(params FileTypeFilter[] types)
+ {
+ if (types.Length == 0)
+ return [];
+
+ return types
+ .Where(t => t != SOURCE_LIKE_FILE_NAMES && t != SOURCE_LIKE_FILE_NAME_PREFIXES)
+ .SelectMany(t => t.FlattenExtensions())
+ .Select(ext => ext.ToLowerInvariant())
+ .Distinct(StringComparer.OrdinalIgnoreCase)
+ .ToArray();
+ }
+
+ ///
+ /// Validates a file path against the provided filters.
+ /// Supports extension-based matching and source-like file names (e.g. Dockerfile).
+ ///
+ public static bool IsAllowedPath(string filePath, params FileTypeFilter[]? types)
+ {
+ if (types == null || types.Length == 0 || string.IsNullOrWhiteSpace(filePath))
+ return false;
+
+ var extension = Path.GetExtension(filePath).TrimStart('.');
+ if (!string.IsNullOrWhiteSpace(extension))
+ {
+ if (OnlyAllowTypes(types).Contains(extension, StringComparer.OrdinalIgnoreCase))
+ return true;
+ }
+
+ var fileName = Path.GetFileName(filePath);
+ if (string.IsNullOrWhiteSpace(fileName))
+ {
+ return false;
+ }
+
+ if (types.Any(t => t.ContainsType(SOURCE_LIKE_FILE_NAMES)))
+ {
+ if (SOURCE_LIKE_FILE_NAMES.FilterExtensions.Contains(fileName)) return true;
+ }
+
+ if (types.Any(t => t.ContainsType(SOURCE_LIKE_FILE_NAME_PREFIXES))){
+ if (SOURCE_LIKE_FILE_NAME_PREFIXES.FilterExtensions.Any(prefix => fileName.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))) return true;
+ }
+
+ return false;
+ }
+}
diff --git a/app/MindWork AI Studio/Tools/Rust/SaveFileOptions.cs b/app/MindWork AI Studio/Tools/Rust/SaveFileOptions.cs
index 107e581a7..f1300ac17 100644
--- a/app/MindWork AI Studio/Tools/Rust/SaveFileOptions.cs
+++ b/app/MindWork AI Studio/Tools/Rust/SaveFileOptions.cs
@@ -6,5 +6,5 @@ public class SaveFileOptions
public PreviousFile? PreviousFile { get; init; }
- public FileTypeFilter? Filter { get; init; }
+ public FileType? Filter { get; init; }
}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Rust/SelectFileOptions.cs b/app/MindWork AI Studio/Tools/Rust/SelectFileOptions.cs
index 28d16809a..fac7d5f4e 100644
--- a/app/MindWork AI Studio/Tools/Rust/SelectFileOptions.cs
+++ b/app/MindWork AI Studio/Tools/Rust/SelectFileOptions.cs
@@ -6,5 +6,5 @@ public sealed class SelectFileOptions
public PreviousFile? PreviousFile { get; init; }
- public FileTypeFilter? Filter { get; init; }
+ public FileType? Filter { get; init; }
}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Rust/TokenCountInfo.cs b/app/MindWork AI Studio/Tools/Rust/TokenCountInfo.cs
new file mode 100644
index 000000000..c0e491bf4
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/Rust/TokenCountInfo.cs
@@ -0,0 +1,6 @@
+namespace AIStudio.Tools.Rust;
+
+public sealed class TokenCountInfo
+{
+ public int TokenCount { get; set; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Rust/TokenizerUploadResponse.cs b/app/MindWork AI Studio/Tools/Rust/TokenizerUploadResponse.cs
new file mode 100644
index 000000000..c141ec746
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/Rust/TokenizerUploadResponse.cs
@@ -0,0 +1,3 @@
+namespace AIStudio.Tools.Rust;
+
+public readonly record struct TokenizerUploadResponse(int Success, string Response);
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Services/RustService.FileSystem.cs b/app/MindWork AI Studio/Tools/Services/RustService.FileSystem.cs
index 4a498b016..7e7c24286 100644
--- a/app/MindWork AI Studio/Tools/Services/RustService.FileSystem.cs
+++ b/app/MindWork AI Studio/Tools/Services/RustService.FileSystem.cs
@@ -17,13 +17,13 @@ public async Task SelectDirectory(string title, stri
return await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions);
}
- public async Task SelectFile(string title, FileTypeFilter? filter = null, string? initialFile = null)
+ public async Task SelectFile(string title, FileTypeFilter[]? filter = null, string? initialFile = null)
{
var payload = new SelectFileOptions
{
Title = title,
PreviousFile = initialFile is null ? null : new (initialFile),
- Filter = filter
+ Filter = FileTypes.AsOneFileType(filter)
};
var result = await this.http.PostAsJsonAsync("/select/file", payload, this.jsonRustSerializerOptions);
@@ -36,13 +36,13 @@ public async Task SelectFile(string title, FileTypeFilter
return await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions);
}
- public async Task SelectFiles(string title, FileTypeFilter? filter = null, string? initialFile = null)
+ public async Task SelectFiles(string title, FileTypeFilter[]? filter = null, string? initialFile = null)
{
var payload = new SelectFileOptions
{
Title = title,
PreviousFile = initialFile is null ? null : new (initialFile),
- Filter = filter
+ Filter = FileTypes.AsOneFileType(filter)
};
var result = await this.http.PostAsJsonAsync("/select/files", payload, this.jsonRustSerializerOptions);
@@ -63,13 +63,13 @@ public async Task SelectFiles(string title, FileTypeFilt
/// An optional initial file path to pre-fill in the dialog.
/// A object containing information about whether the user canceled the
/// operation and whether the select operation was successful.
- public async Task SaveFile(string title, FileTypeFilter? filter = null, string? initialFile = null)
+ public async Task SaveFile(string title, FileTypeFilter[]? filter = null, string? initialFile = null)
{
var payload = new SaveFileOptions
{
Title = title,
PreviousFile = initialFile is null ? null : new (initialFile),
- Filter = filter
+ Filter = FileTypes.AsOneFileType(filter)
};
var result = await this.http.PostAsJsonAsync("/save/file", payload, this.jsonRustSerializerOptions);
@@ -81,4 +81,21 @@ public async Task SaveFile(string title, FileTypeFilter? filte
return await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions);
}
+
+ public async Task ValidateAndStoreTokenizer(string? modelId, string filePath)
+ {
+ var result = await this.http.PostAsJsonAsync("/tokenizer/val-and-store", new {
+ model_id = modelId,
+ file_path = filePath,
+ }, this.jsonRustSerializerOptions);
+
+ if (!result.IsSuccessStatusCode)
+ {
+ this.logger!.LogError($"Failed to validate and store the tokenizer '{result.StatusCode}'");
+ return new TokenizerUploadResponse(-1, "An error occured while validating and storing the tokenizer");
+ }
+
+ return await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions);
+ }
+
}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Tokenizer.cs b/app/MindWork AI Studio/Tools/Services/RustService.Tokenizer.cs
new file mode 100644
index 000000000..e01272dbe
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/Services/RustService.Tokenizer.cs
@@ -0,0 +1,27 @@
+using AIStudio.Tools.Rust;
+
+namespace AIStudio.Tools.Services;
+
+public sealed partial class RustService
+{
+ public async Task GetTokenCount(string text)
+ {
+ try
+ {
+ var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
+ var payload = new { text };
+ var response = await this.http.PostAsJsonAsync("/system/tokenizer/count", payload, this.jsonRustSerializerOptions, cts.Token);
+ response.EnsureSuccessStatusCode();
+ return await response.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions, cancellationToken: cts.Token);
+ }
+ catch (Exception e)
+ {
+ if(this.logger is not null)
+ this.logger.LogError(e, "Error while getting token count from Rust service.");
+ else
+ Console.WriteLine($"Error while getting token count from Rust service: '{e}'.");
+
+ return null;
+ }
+ }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Validation/FileExtensionValidation.cs b/app/MindWork AI Studio/Tools/Validation/FileExtensionValidation.cs
index 02a978d1a..d38a8c086 100644
--- a/app/MindWork AI Studio/Tools/Validation/FileExtensionValidation.cs
+++ b/app/MindWork AI Studio/Tools/Validation/FileExtensionValidation.cs
@@ -43,8 +43,7 @@ public enum UseCase
/// True if valid, false if invalid (error/warning already sent via MessageBus).
public static async Task IsExtensionValidWithNotifyAsync(UseCase useCae, string filePath, bool validateMediaFileTypes = true, Settings.Provider? provider = null)
{
- var ext = Path.GetExtension(filePath).TrimStart('.').ToLowerInvariant();
- if(FileTypeFilter.Executables.FilterExtensions.Contains(ext))
+ if (FileTypes.IsAllowedPath(filePath, FileTypes.EXECUTABLES))
{
await MessageBus.INSTANCE.SendError(new(
Icons.Material.Filled.AppBlocking,
@@ -53,7 +52,7 @@ await MessageBus.INSTANCE.SendError(new(
}
var capabilities = provider?.GetModelCapabilities() ?? new();
- if (FileTypeFilter.AllImages.FilterExtensions.Contains(ext))
+ if (FileTypes.IsAllowedPath(filePath, FileTypes.IMAGE))
{
switch (useCae)
{
@@ -88,7 +87,7 @@ await MessageBus.INSTANCE.SendWarning(new(
}
}
- if(FileTypeFilter.AllVideos.FilterExtensions.Contains(ext))
+ if (FileTypes.IsAllowedPath(filePath, FileTypes.VIDEO))
{
await MessageBus.INSTANCE.SendWarning(new(
Icons.Material.Filled.FeaturedVideo,
@@ -96,7 +95,7 @@ await MessageBus.INSTANCE.SendWarning(new(
return false;
}
- if(FileTypeFilter.AllAudio.FilterExtensions.Contains(ext))
+ if (FileTypes.IsAllowedPath(filePath, FileTypes.AUDIO))
{
await MessageBus.INSTANCE.SendWarning(new(
Icons.Material.Filled.AudioFile,
@@ -123,7 +122,7 @@ await MessageBus.INSTANCE.SendError(new(
return false;
}
- if (!Array.Exists(FileTypeFilter.AllImages.FilterExtensions, x => x.Equals(ext, StringComparison.OrdinalIgnoreCase)))
+ if (FileTypes.IsAllowedPath(filePath, FileTypes.IMAGE))
{
await MessageBus.INSTANCE.SendError(new(
Icons.Material.Filled.ImageNotSupported,
diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs
index 1b13e0991..102efbe2e 100644
--- a/runtime/src/lib.rs
+++ b/runtime/src/lib.rs
@@ -17,4 +17,5 @@ pub mod qdrant;
pub mod certificate_factory;
pub mod runtime_api_token;
pub mod stale_process_cleanup;
-mod sidecar_types;
\ No newline at end of file
+mod sidecar_types;
+pub mod tokenizer;
\ No newline at end of file
diff --git a/runtime/src/main.rs b/runtime/src/main.rs
index 00a7ba905..a210de540 100644
--- a/runtime/src/main.rs
+++ b/runtime/src/main.rs
@@ -11,7 +11,7 @@ use mindwork_ai_studio::environment::is_dev;
use mindwork_ai_studio::log::init_logging;
use mindwork_ai_studio::metadata::MetaData;
use mindwork_ai_studio::runtime_api::start_runtime_api;
-
+use mindwork_ai_studio::tokenizer::{init_tokenizer};
#[tokio::main]
async fn main() {
@@ -43,8 +43,12 @@ async fn main() {
info!("Running in production mode.");
}
+ if let Err(e) = init_tokenizer() {
+ warn!(Source = "Tokenizer"; "Error during the initialisation of the tokenizer: {}", e);
+ }
+
generate_runtime_certificate();
start_runtime_api();
start_tauri();
-}
\ No newline at end of file
+}
diff --git a/runtime/src/runtime_api.rs b/runtime/src/runtime_api.rs
index 64bc8174a..6ceeb1e2b 100644
--- a/runtime/src/runtime_api.rs
+++ b/runtime/src/runtime_api.rs
@@ -89,6 +89,7 @@ pub fn start_runtime_api() {
crate::file_data::extract_data,
crate::log::get_log_paths,
crate::log::log_event,
+ crate::tokenizer::tokenizer_count,
crate::app_window::register_shortcut,
crate::app_window::validate_shortcut,
crate::app_window::suspend_shortcuts,
diff --git a/runtime/src/tokenizer.rs b/runtime/src/tokenizer.rs
new file mode 100644
index 000000000..3614b3968
--- /dev/null
+++ b/runtime/src/tokenizer.rs
@@ -0,0 +1,54 @@
+use std::fs;
+use std::path::{PathBuf};
+use std::sync::OnceLock;
+use rocket::{post};
+use rocket::serde::json::Json;
+use rocket::serde::Serialize;
+use serde::Deserialize;
+use tokenizers::Error;
+use tokenizers::tokenizer::Tokenizer;
+use crate::api_token::APIToken;
+
+static TOKENIZER: OnceLock = OnceLock::new();
+
+static TEXT: &str = "";
+
+pub fn init_tokenizer() -> Result<(), Error>{
+ let mut target_dir = PathBuf::from("target");
+ target_dir.push("tokenizers");
+ fs::create_dir_all(&target_dir)?;
+
+ let mut local_tokenizer_path = target_dir.clone();
+ local_tokenizer_path.push("tokenizer.json");
+
+ TOKENIZER.set(Tokenizer::from_file(local_tokenizer_path)?).expect("Could not set the tokenizer.");
+ Ok(())
+}
+
+pub fn get_token_count(mut text: &str) -> usize {
+ if text.is_empty() {
+ text = TEXT;
+ }
+ match TOKENIZER.get().unwrap().encode(text, true) {
+ Ok(encoding) => encoding.len(),
+ Err(_) => 0,
+ }
+}
+
+#[derive(Deserialize)]
+pub struct SetTokenText {
+ pub text: String,
+}
+
+#[derive(Serialize)]
+pub struct GetTokenCount{
+ token_count: usize,
+}
+
+
+#[post("/system/tokenizer/count", data = "")]
+pub fn tokenizer_count(_token: APIToken, req: Json) -> Json {
+ Json(GetTokenCount {
+ token_count: get_token_count(&req.text),
+ })
+}
\ No newline at end of file