diff --git a/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs b/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs index cd299f698..615355e05 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs @@ -6,19 +6,31 @@ public class TokenStatsModel public string Model { get; set; } public string Prompt { get; set; } - #region Input + #region Text token public int TextInputTokens { get; set; } public int CachedTextInputTokens { get; set; } + public int TextOutputTokens { get; set; } + #endregion + + #region Audio token public int AudioInputTokens { get; set; } public int CachedAudioInputTokens { get; set; } + public int AudioOutputTokens { get; set; } #endregion - #region Output - public int TextOutputTokens { get; set; } - public int AudioOutputTokens { get; set; } + #region Image token + public int ImageInputTokens { get; set; } + public int CachedImageInputTokens { get; set; } + public int ImageOutputTokens { get; set; } #endregion + #region Image + public int ImageGenerationCount { get; set; } + public float ImageGenerationUnitCost { get; set; } + #endregion - public int TotalInputTokens => TextInputTokens + CachedTextInputTokens + AudioInputTokens + CachedAudioInputTokens; - public int TotalOutputTokens => TextOutputTokens + AudioOutputTokens; + public int TotalInputTokens => TextInputTokens + CachedTextInputTokens + + AudioInputTokens + CachedAudioInputTokens + + ImageInputTokens + CachedImageInputTokens; + public int TotalOutputTokens => TextOutputTokens + AudioOutputTokens + ImageOutputTokens; } diff --git a/src/Infrastructure/BotSharp.Abstraction/Files/Proccessors/IFileProcessor.cs b/src/Infrastructure/BotSharp.Abstraction/Files/Proccessors/IFileProcessor.cs index fbcced77a..19ab437c9 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Files/Proccessors/IFileProcessor.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Files/Proccessors/IFileProcessor.cs @@ -1,7 +1,5 @@ using BotSharp.Abstraction.Files.Options; using BotSharp.Abstraction.Files.Responses; -using BotSharp.Abstraction.Knowledges.Options; -using BotSharp.Abstraction.Knowledges.Responses; namespace BotSharp.Abstraction.Files.Proccessors; @@ -11,7 +9,4 @@ public interface IFileProcessor Task HandleFilesAsync(Agent agent, string text, IEnumerable files, FileHandleOptions? options = null) => throw new NotImplementedException(); - - Task GetFileKnowledgeAsync(FileBinaryDataModel file, FileKnowledgeHandleOptions? options = null) - => throw new NotImplementedException(); } diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseCollection.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseCollection.cs deleted file mode 100644 index 9238e3220..000000000 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseCollection.cs +++ /dev/null @@ -1,7 +0,0 @@ -namespace BotSharp.Abstraction.Knowledges; - -public interface IPhraseCollection -{ - Task>> LoadVocabularyAsync(); - Task> LoadSynonymMappingAsync(); -} diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseService.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseService.cs deleted file mode 100644 index 1ca84024a..000000000 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseService.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace BotSharp.Abstraction.Knowledges; - -public interface IPhraseService -{ - Task> SearchPhrasesAsync(string term); -} \ No newline at end of file diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/SearchPhrasesResult.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/SearchPhrasesResult.cs deleted file mode 100644 index 64dc0c18f..000000000 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/SearchPhrasesResult.cs +++ /dev/null @@ -1,11 +0,0 @@ - -namespace BotSharp.Abstraction.Knowledges.Models; - -public class SearchPhrasesResult -{ - public string Token { get; set; } = string.Empty; - public List Sources { get; set; } = new(); - public string CanonicalForm { get; set; } = string.Empty; - public string MatchType { get; set; } = string.Empty; - public double Confidence { get; set; } -} \ No newline at end of file diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Processors/IKnowledgeProcessor.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Processors/IKnowledgeProcessor.cs new file mode 100644 index 000000000..4a7f2b5ba --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Processors/IKnowledgeProcessor.cs @@ -0,0 +1,12 @@ +using BotSharp.Abstraction.Knowledges.Options; +using BotSharp.Abstraction.Knowledges.Responses; + +namespace BotSharp.Abstraction.Knowledges.Processors; + +public interface IKnowledgeProcessor +{ + public string Provider { get; } + + Task GetFileKnowledgeAsync(FileBinaryDataModel file, FileKnowledgeHandleOptions? options = null) + => throw new NotImplementedException(); +} diff --git a/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs b/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs index 77bd46b28..33e7a53f4 100644 --- a/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs +++ b/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs @@ -106,27 +106,37 @@ public class ImageSetting public ImageVariationSetting? Variation { get; set; } } +/// +/// Image generationg setting +/// public class ImageGenerationSetting { - public ModelSettingBase? Style { get; set; } - public ModelSettingBase? Size { get; set; } - public ModelSettingBase? Quality { get; set; } - public ModelSettingBase? ResponseFormat { get; set; } - public ModelSettingBase? Background { get; set; } + /// + /// Parameters: quality, size, style, background, response format + /// + public Dictionary? Parameters { get; set; } } +/// +/// Image editing setting +/// public class ImageEditSetting { - public ModelSettingBase? Size { get; set; } - public ModelSettingBase? Quality { get; set; } - public ModelSettingBase? ResponseFormat { get; set; } - public ModelSettingBase? Background { get; set; } + /// + /// Parameters: quality, size, background, response format + /// + public Dictionary? Parameters { get; set; } } +/// +/// Image variation setting +/// public class ImageVariationSetting { - public ModelSettingBase? Size { get; set; } - public ModelSettingBase? ResponseFormat { get; set; } + /// + /// Parameters: size, response format + /// + public Dictionary? Parameters { get; set; } } #endregion @@ -136,15 +146,21 @@ public class AudioSetting public AudioTranscriptionSetting? Transcription { get; set; } } +/// +/// Audio transcription setting +/// public class AudioTranscriptionSetting { public float? Temperature { get; set; } - public ModelSettingBase? ResponseFormat { get; set; } - public ModelSettingBase? Granularity { get; set; } + + /// + /// Parameters: granularity, response format + /// + public Dictionary? Parameters { get; set; } } #endregion -public class ModelSettingBase +public class ModelParamSetting { public string? Default { get; set; } public IEnumerable? Options { get; set; } @@ -156,15 +172,36 @@ public class ModelSettingBase /// public class LlmCostSetting { - // Input + #region Text token public float TextInputCost { get; set; } = 0f; public float CachedTextInputCost { get; set; } = 0f; + public float TextOutputCost { get; set; } = 0f; + #endregion + + #region Audio token public float AudioInputCost { get; set; } = 0f; public float CachedAudioInputCost { get; set; } = 0f; - - // Output - public float TextOutputCost { get; set; } = 0f; public float AudioOutputCost { get; set; } = 0f; + #endregion + + #region Image token + public float ImageInputCost { get; set; } = 0f; + public float CachedImageInputCost { get; set; } = 0f; + public float ImageOutputCost { get; set; } = 0f; + #endregion + + #region Image + public IList? ImageCosts { get; set; } + #endregion +} + +public class LlmImageCost +{ + /// + /// Attributes: e.g., [quality]: "medium", [size] = "1024x1024" + /// + public Dictionary Attributes { get; set; } = []; + public float Cost { get; set; } = 0f; } public enum LlmModelType diff --git a/src/Infrastructure/BotSharp.Abstraction/MLTasks/Utilities/LlmUtility.cs b/src/Infrastructure/BotSharp.Abstraction/MLTasks/Utilities/LlmUtility.cs index 0303f6e25..6c6d7b33f 100644 --- a/src/Infrastructure/BotSharp.Abstraction/MLTasks/Utilities/LlmUtility.cs +++ b/src/Infrastructure/BotSharp.Abstraction/MLTasks/Utilities/LlmUtility.cs @@ -1,7 +1,23 @@ +using BotSharp.Abstraction.MLTasks.Settings; + namespace BotSharp.Abstraction.MLTasks.Utilities; public static class LlmUtility { + public static string? GetModelParameter(IDictionary? settings, string key, string curVal) + { + string? res = null; + + if (settings != null + && settings.TryGetValue(key, out var value) + && value != null) + { + res = VerifyModelParameter(curVal, value.Default, value.Options); + } + + return res; + } + public static string? VerifyModelParameter(string? curVal, string? defaultVal, IEnumerable? options = null) { if (options.IsNullOrEmpty()) @@ -9,6 +25,6 @@ public static class LlmUtility return curVal.IfNullOrEmptyAs(defaultVal); } - return options.Contains(curVal) ? curVal : defaultVal; + return options!.Contains(curVal) ? curVal : defaultVal; } } diff --git a/src/Infrastructure/BotSharp.Abstraction/Statistics/Models/BotSharpStats.cs b/src/Infrastructure/BotSharp.Abstraction/Statistics/Models/BotSharpStats.cs index 6b3184fb6..68467fc31 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Statistics/Models/BotSharpStats.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Statistics/Models/BotSharpStats.cs @@ -80,6 +80,9 @@ public class StatsCount { [JsonPropertyName("agent_call_count")] public long AgentCallCount { get; set; } + + [JsonPropertyName("image_generation_total_count")] + public int ImageGenerationTotalCount { get; set; } } public class StatsLlmCost @@ -95,4 +98,7 @@ public class StatsLlmCost [JsonPropertyName("completion_total_cost")] public float CompletionTotalCost { get; set; } + + [JsonPropertyName("image_generation_total_cost")] + public float ImageGenerationTotalCost { get; set; } } \ No newline at end of file diff --git a/src/Infrastructure/BotSharp.Abstraction/Statistics/Models/BotSharpStatsDelta.cs b/src/Infrastructure/BotSharp.Abstraction/Statistics/Models/BotSharpStatsDelta.cs index 7409c37f5..6f678df22 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Statistics/Models/BotSharpStatsDelta.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Statistics/Models/BotSharpStatsDelta.cs @@ -29,6 +29,7 @@ public string Interval public class StatsCountDelta { public int AgentCallCountDelta { get; set; } + public int ImageGenerationTotalCountDelta { get; set; } } public class StatsLlmCostDelta @@ -37,4 +38,5 @@ public class StatsLlmCostDelta public int CompletionTokensDelta { get; set; } public float PromptTotalCostDelta { get; set; } public float CompletionTotalCostDelta { get; set; } + public float ImageGenerationTotalCostDelta { get; set; } } diff --git a/src/Infrastructure/BotSharp.Abstraction/Tokenizers/ITokenDataLoader.cs b/src/Infrastructure/BotSharp.Abstraction/Tokenizers/ITokenDataLoader.cs new file mode 100644 index 000000000..745dfd914 --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Tokenizers/ITokenDataLoader.cs @@ -0,0 +1,18 @@ +namespace BotSharp.Abstraction.Tokenizers; + +public interface ITokenDataLoader +{ + string Provider { get; } + + /// + /// Load vocabulary data => return mapping: [data source] = a list of vocabularies + /// + /// + Task>> LoadVocabularyAsync(); + + /// + /// Load synonym data => return mapping: [word/phrase] = (data source, canonical form) + /// + /// + Task> LoadSynonymMappingAsync(); +} diff --git a/src/Infrastructure/BotSharp.Abstraction/Tokenizers/ITokenizer.cs b/src/Infrastructure/BotSharp.Abstraction/Tokenizers/ITokenizer.cs new file mode 100644 index 000000000..b13663899 --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Tokenizers/ITokenizer.cs @@ -0,0 +1,11 @@ +using BotSharp.Abstraction.Tokenizers.Models; +using BotSharp.Abstraction.Tokenizers.Responses; + +namespace BotSharp.Abstraction.Tokenizers; + +public interface ITokenizer +{ + string Provider { get; } + + Task TokenizeAsync(string text, TokenizeOptions? options = null); +} diff --git a/src/Infrastructure/BotSharp.Abstraction/Tokenizers/Models/TokenizeOptions.cs b/src/Infrastructure/BotSharp.Abstraction/Tokenizers/Models/TokenizeOptions.cs new file mode 100644 index 000000000..1f1c70f25 --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Tokenizers/Models/TokenizeOptions.cs @@ -0,0 +1,32 @@ +namespace BotSharp.Abstraction.Tokenizers.Models; + +public class TokenizeOptions +{ + /// + /// Token data providers + /// + [JsonPropertyName("data_providers")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public List? DataProviders { get; set; } + + /// + /// Maximum n-gram size + /// + [JsonPropertyName("max_ngram")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public int? MaxNgram { get; set; } + + /// + /// Cutoff score: from 0 to 1 + /// + [JsonPropertyName("cutoff")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public double? Cutoff { get; set; } + + /// + /// Top k + /// + [JsonPropertyName("top_k")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public int? TopK { get; set; } +} diff --git a/src/Infrastructure/BotSharp.Abstraction/Tokenizers/Models/TokenizeResult.cs b/src/Infrastructure/BotSharp.Abstraction/Tokenizers/Models/TokenizeResult.cs new file mode 100644 index 000000000..a02a716d2 --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Tokenizers/Models/TokenizeResult.cs @@ -0,0 +1,12 @@ +namespace BotSharp.Abstraction.Tokenizers.Models; + +public class TokenizeResult +{ + public string Token { get; set; } = string.Empty; + + [JsonPropertyName("canonical_text")] + public string? CanonicalText { get; set; } + + [JsonPropertyName("data")] + public IDictionary Data { get; set; } = new Dictionary(); +} diff --git a/src/Infrastructure/BotSharp.Abstraction/Tokenizers/Responses/TokenizeResponse.cs b/src/Infrastructure/BotSharp.Abstraction/Tokenizers/Responses/TokenizeResponse.cs new file mode 100644 index 000000000..e01fe153f --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Tokenizers/Responses/TokenizeResponse.cs @@ -0,0 +1,8 @@ +using BotSharp.Abstraction.Tokenizers.Models; + +namespace BotSharp.Abstraction.Tokenizers.Responses; + +public class TokenizeResponse : ResponseBase +{ + public List Results { get; set; } = []; +} diff --git a/src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs b/src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs index 97bad631d..a92c4c8f3 100644 --- a/src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs +++ b/src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs @@ -1,4 +1,3 @@ -using BotSharp.Abstraction.Conversations.Enums; using BotSharp.Abstraction.MLTasks; using System.Diagnostics; @@ -41,18 +40,35 @@ public void AddToken(TokenStatsModel stats, RoleDialogModel message) var settingsService = _services.GetRequiredService(); var settings = settingsService.GetSetting(stats.Provider, _model); - var deltaTextInputCost = stats.TextInputTokens / 1000f * (settings.Cost?.TextInputCost ?? 0f); - var deltaCachedTextInputCost = stats.CachedTextInputTokens / 1000f * (settings.Cost?.CachedTextInputCost ?? 0f); - var deltaAudioInputCost = stats.AudioInputTokens / 1000f * (settings.Cost?.AudioInputCost ?? 0f); - var deltaCachedAudioInputCost = stats.CachedAudioInputTokens / 1000f * (settings.Cost?.CachedAudioInputCost ?? 0f); + #region Text tokens + var deltaTextInputCost = GetDeltaCost(stats.TextInputTokens, settings?.Cost?.TextInputCost); + var deltaCachedTextInputCost = GetDeltaCost(stats.CachedTextInputTokens, settings?.Cost?.CachedTextInputCost); + var deltaTextOutputCost = GetDeltaCost(stats.TextOutputTokens, settings?.Cost?.TextOutputCost); + #endregion - var deltaTextOutputCost = stats.TextOutputTokens / 1000f * (settings.Cost?.TextOutputCost ?? 0f); - var deltaAudioOutputCost = stats.AudioOutputTokens / 1000f * (settings.Cost?.AudioOutputCost ?? 0f); + #region Audio tokens + var deltaAudioInputCost = GetDeltaCost(stats.AudioInputTokens, settings?.Cost?.AudioInputCost); + var deltaCachedAudioInputCost = GetDeltaCost(stats.CachedAudioInputTokens, settings?.Cost?.CachedAudioInputCost); + var deltaAudioOutputCost = GetDeltaCost(stats.AudioOutputTokens, settings?.Cost?.AudioOutputCost); + #endregion - var deltaPromptCost = deltaTextInputCost + deltaCachedTextInputCost + deltaAudioInputCost + deltaCachedAudioInputCost; - var deltaCompletionCost = deltaTextOutputCost + deltaAudioOutputCost; + #region Image tokens + var deltaImageInputCost = GetDeltaCost(stats.ImageInputTokens, settings?.Cost?.ImageInputCost); + var deltaCachedImageInputCost = GetDeltaCost(stats.CachedImageInputTokens, settings?.Cost?.CachedImageInputCost); + var deltaImageOutputCost = GetDeltaCost(stats.ImageOutputTokens, settings?.Cost?.ImageOutputCost); + #endregion - var deltaTotal = deltaPromptCost + deltaCompletionCost; + #region Image generation + var deltaImageGenerationCost = stats.ImageGenerationCount * stats.ImageGenerationUnitCost; + #endregion + + + var deltaPromptCost = deltaTextInputCost + deltaCachedTextInputCost + + deltaAudioInputCost + deltaCachedAudioInputCost + + deltaImageInputCost + deltaCachedImageInputCost; + var deltaCompletionCost = deltaTextOutputCost + deltaAudioOutputCost + deltaImageOutputCost; + + var deltaTotal = deltaPromptCost + deltaCompletionCost + deltaImageGenerationCost; _promptCost += deltaPromptCost; _completionCost += deltaCompletionCost; @@ -78,12 +94,18 @@ public void AddToken(TokenStatsModel stats, RoleDialogModel message) AgentId = agentId, RecordTime = DateTime.UtcNow, IntervalType = StatsInterval.Day, + CountDelta = new() + { + AgentCallCountDelta = 1, + ImageGenerationTotalCountDelta = stats.ImageGenerationCount + }, LlmCostDelta = new() { PromptTokensDelta = stats.TotalInputTokens, CompletionTokensDelta = stats.TotalOutputTokens, PromptTotalCostDelta = deltaPromptCost, - CompletionTotalCostDelta = deltaCompletionCost + CompletionTotalCostDelta = deltaCompletionCost, + ImageGenerationTotalCostDelta = deltaImageGenerationCost } }; globalStats.UpdateStats($"global-{metric}-{dim}-{agentId}", delta); @@ -127,4 +149,9 @@ public void StopTimer() } _timer.Stop(); } + + private float GetDeltaCost(int tokens, float? unitCost) + { + return tokens / 1000f * (unitCost ?? 0f); + } } diff --git a/src/Infrastructure/BotSharp.Core/Infrastructures/PollyExtensions.cs b/src/Infrastructure/BotSharp.Core/Infrastructures/PollyExtensions.cs index b2b74b74a..1487a8a94 100644 --- a/src/Infrastructure/BotSharp.Core/Infrastructures/PollyExtensions.cs +++ b/src/Infrastructure/BotSharp.Core/Infrastructures/PollyExtensions.cs @@ -1,4 +1,3 @@ -using Microsoft.Extensions.Logging; using Polly; using Polly.Retry; using StackExchange.Redis; diff --git a/src/Infrastructure/BotSharp.Logger/BotSharpLoggerExtensions.cs b/src/Infrastructure/BotSharp.Logger/BotSharpLoggerExtensions.cs index 0404ceb46..0e5751e4b 100644 --- a/src/Infrastructure/BotSharp.Logger/BotSharpLoggerExtensions.cs +++ b/src/Infrastructure/BotSharp.Logger/BotSharpLoggerExtensions.cs @@ -13,7 +13,6 @@ public static IServiceCollection AddBotSharpLogger(this IServiceCollection servi services.AddScoped(); services.AddScoped(); services.AddScoped(); - services.AddScoped(); services.AddScoped(); services.AddScoped(); return services; diff --git a/src/Infrastructure/BotSharp.Logger/Hooks/CommonContentGeneratingHook.cs b/src/Infrastructure/BotSharp.Logger/Hooks/CommonContentGeneratingHook.cs index 25f08c22a..f30559046 100644 --- a/src/Infrastructure/BotSharp.Logger/Hooks/CommonContentGeneratingHook.cs +++ b/src/Infrastructure/BotSharp.Logger/Hooks/CommonContentGeneratingHook.cs @@ -18,14 +18,20 @@ public async Task AfterGenerated(RoleDialogModel message, TokenStatsModel tokenS private void SaveLlmCompletionLog(RoleDialogModel message, TokenStatsModel tokenStats) { var convSettings = _services.GetRequiredService(); - if (!convSettings.EnableLlmCompletionLog) return; + var conv = _services.GetRequiredService(); - var db = _services.GetRequiredService(); - var state = _services.GetRequiredService(); + if (!convSettings.EnableLlmCompletionLog + || string.IsNullOrEmpty(conv.ConversationId) + || string.IsNullOrWhiteSpace(tokenStats.Prompt) + || string.IsNullOrWhiteSpace(message.Content)) + { + return; + } + var db = _services.GetRequiredService(); var completionLog = new LlmCompletionLog { - ConversationId = state.GetConversationId(), + ConversationId = conv.ConversationId, MessageId = message.MessageId, AgentId = message.CurrentAgentId, Prompt = tokenStats.Prompt, diff --git a/src/Infrastructure/BotSharp.Logger/Hooks/GlobalStatsConversationHook.cs b/src/Infrastructure/BotSharp.Logger/Hooks/GlobalStatsConversationHook.cs deleted file mode 100644 index 5db5ec7cc..000000000 --- a/src/Infrastructure/BotSharp.Logger/Hooks/GlobalStatsConversationHook.cs +++ /dev/null @@ -1,43 +0,0 @@ -using BotSharp.Abstraction.Statistics.Enums; -using BotSharp.Abstraction.Statistics.Models; -using BotSharp.Abstraction.Statistics.Services; - -namespace BotSharp.Logger.Hooks; - -public class GlobalStatsConversationHook : IContentGeneratingHook -{ - private readonly IServiceProvider _services; - - public GlobalStatsConversationHook( - IServiceProvider services) - { - _services = services; - } - - public async Task AfterGenerated(RoleDialogModel message, TokenStatsModel tokenStats) - { - UpdateAgentCall(message); - await Task.CompletedTask; - } - - private void UpdateAgentCall(RoleDialogModel message) - { - // record agent call - var globalStats = _services.GetRequiredService(); - - var metric = StatsMetric.AgentCall; - var dim = "agent"; - var agentId = message.CurrentAgentId ?? string.Empty; - var delta = new BotSharpStatsDelta - { - AgentId = agentId, - RecordTime = DateTime.UtcNow, - IntervalType = StatsInterval.Day, - CountDelta = new() - { - AgentCallCountDelta = 1 - } - }; - globalStats.UpdateStats($"global-{metric}-{dim}-{agentId}", delta); - } -} diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBase/KnowledgeBaseController.Document.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBase/KnowledgeBaseController.Document.cs new file mode 100644 index 000000000..7411e98b9 --- /dev/null +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBase/KnowledgeBaseController.Document.cs @@ -0,0 +1,87 @@ +using BotSharp.Abstraction.Files.Utilities; +using BotSharp.Abstraction.Knowledges.Options; +using BotSharp.Abstraction.Knowledges.Processors; +using BotSharp.Abstraction.Knowledges.Responses; +using BotSharp.OpenAPI.ViewModels.Knowledges; + +namespace BotSharp.OpenAPI.Controllers; + +public partial class KnowledgeBaseController +{ + #region Document + [HttpGet("/knowledge/document/processors")] + public IEnumerable GetKnowledgeDocumentProcessors() + { + return _services.GetServices().Select(x => x.Provider); + } + + [HttpPost("/knowledge/document/{collection}/upload")] + public async Task UploadKnowledgeDocuments([FromRoute] string collection, [FromBody] VectorKnowledgeUploadRequest request) + { + var response = await _knowledgeService.UploadDocumentsToKnowledge(collection, request.Files, request.Options); + return response; + } + + [HttpPost("/knowledge/document/{collection}/form")] + public async Task UploadKnowledgeDocuments( + [FromRoute] string collection, + [FromForm] IEnumerable files, + [FromForm] KnowledgeDocOptions? options = null) + { + if (files.IsNullOrEmpty()) + { + return new UploadKnowledgeResponse(); + } + + var docs = new List(); + foreach (var file in files) + { + var data = FileUtility.BuildFileDataFromFile(file); + docs.Add(new ExternalFileModel + { + FileName = file.FileName, + FileData = data + }); + } + + var response = await _knowledgeService.UploadDocumentsToKnowledge(collection, docs, options); + return response; + } + + [HttpDelete("/knowledge/document/{collection}/delete/{fileId}")] + public async Task DeleteKnowledgeDocument([FromRoute] string collection, [FromRoute] Guid fileId) + { + var response = await _knowledgeService.DeleteKnowledgeDocument(collection, fileId); + return response; + } + + [HttpDelete("/knowledge/document/{collection}/delete")] + public async Task DeleteKnowledgeDocuments([FromRoute] string collection, [FromBody] GetKnowledgeDocsRequest request) + { + var response = await _knowledgeService.DeleteKnowledgeDocuments(collection, request); + return response; + } + + [HttpPost("/knowledge/document/{collection}/page")] + public async Task> GetPagedKnowledgeDocuments([FromRoute] string collection, [FromBody] GetKnowledgeDocsRequest request) + { + var data = await _knowledgeService.GetPagedKnowledgeDocuments(collection, request); + + return new PagedItems + { + Items = data.Items.Select(x => KnowledgeFileViewModel.From(x)), + Count = data.Count + }; + } + + [HttpGet("/knowledge/document/{collection}/file/{fileId}")] + public async Task GetKnowledgeDocument([FromRoute] string collection, [FromRoute] Guid fileId) + { + var file = await _knowledgeService.GetKnowledgeDocumentBinaryData(collection, fileId); + var stream = file.FileBinaryData.ToStream(); + stream.Position = 0; + + return new FileStreamResult(stream, file.ContentType) { FileDownloadName = file.FileName }; + } + #endregion +} diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBase/KnowledgeBaseController.Tokenizer.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBase/KnowledgeBaseController.Tokenizer.cs new file mode 100644 index 000000000..08f7812fc --- /dev/null +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBase/KnowledgeBaseController.Tokenizer.cs @@ -0,0 +1,48 @@ +using BotSharp.Abstraction.Tokenizers; +using BotSharp.Abstraction.Tokenizers.Responses; +using BotSharp.OpenAPI.ViewModels.Knowledges; + +namespace BotSharp.OpenAPI.Controllers; + +public partial class KnowledgeBaseController +{ + /// + /// Tokenize text with options + /// + /// + /// + [HttpPost("knowledge/tokenize")] + public async Task Tokenize([FromBody] TokenizeRequest request) + { + var tokenizer = _services.GetServices() + .FirstOrDefault(x => x.Provider.IsEqualTo(request.Provider)); + + if (tokenizer == null) + { + return null; + } + return await tokenizer.TokenizeAsync(request.Text, request.Options); + } + + /// + /// Get tokenizer providers + /// + /// + [HttpGet("knowledge/tokenizer/providers")] + public IEnumerable GetTokenizerProviders() + { + var tokenizers = _services.GetServices(); + return tokenizers.Select(x => x.Provider); + } + + /// + /// Get token data loader providers + /// + /// + [HttpGet("knowledge/tokenizer/data-providers")] + public IEnumerable GetTokenizerDataProviders() + { + var dataLoaders = _services.GetServices(); + return dataLoaders.Select(x => x.Provider); + } +} diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBase/KnowledgeBaseController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBase/KnowledgeBaseController.cs index 3a9a9e732..2618f79af 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBase/KnowledgeBaseController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBase/KnowledgeBaseController.cs @@ -1,17 +1,14 @@ using BotSharp.Abstraction.Files.Utilities; using BotSharp.Abstraction.Graph.Options; -using BotSharp.Abstraction.Knowledges.Options; -using BotSharp.Abstraction.Knowledges.Responses; using BotSharp.Abstraction.VectorStorage.Models; using BotSharp.Abstraction.VectorStorage.Options; using BotSharp.OpenAPI.ViewModels.Knowledges; -using BotSharp.OpenAPI.ViewModels.Knowledges.Request; namespace BotSharp.OpenAPI.Controllers; [Authorize] [ApiController] -public class KnowledgeBaseController : ControllerBase +public partial class KnowledgeBaseController : ControllerBase { private readonly IKnowledgeService _knowledgeService; private readonly IServiceProvider _services; @@ -203,78 +200,6 @@ public async Task DeleteVectorCollectionSnapshots([FromRoute] string colle #endregion - #region Document - [HttpPost("/knowledge/document/{collection}/upload")] - public async Task UploadKnowledgeDocuments([FromRoute] string collection, [FromBody] VectorKnowledgeUploadRequest request) - { - var response = await _knowledgeService.UploadDocumentsToKnowledge(collection, request.Files, request.Options); - return response; - } - - [HttpPost("/knowledge/document/{collection}/form")] - public async Task UploadKnowledgeDocuments( - [FromRoute] string collection, - [FromForm] IEnumerable files, - [FromForm] KnowledgeDocOptions? options = null) - { - if (files.IsNullOrEmpty()) - { - return new UploadKnowledgeResponse(); - } - - var docs = new List(); - foreach (var file in files) - { - var data = FileUtility.BuildFileDataFromFile(file); - docs.Add(new ExternalFileModel - { - FileName = file.FileName, - FileData = data - }); - } - - var response = await _knowledgeService.UploadDocumentsToKnowledge(collection, docs, options); - return response; - } - - [HttpDelete("/knowledge/document/{collection}/delete/{fileId}")] - public async Task DeleteKnowledgeDocument([FromRoute] string collection, [FromRoute] Guid fileId) - { - var response = await _knowledgeService.DeleteKnowledgeDocument(collection, fileId); - return response; - } - - [HttpDelete("/knowledge/document/{collection}/delete")] - public async Task DeleteKnowledgeDocuments([FromRoute] string collection, [FromBody] GetKnowledgeDocsRequest request) - { - var response = await _knowledgeService.DeleteKnowledgeDocuments(collection, request); - return response; - } - - [HttpPost("/knowledge/document/{collection}/page")] - public async Task> GetPagedKnowledgeDocuments([FromRoute] string collection, [FromBody] GetKnowledgeDocsRequest request) - { - var data = await _knowledgeService.GetPagedKnowledgeDocuments(collection, request); - - return new PagedItems - { - Items = data.Items.Select(x => KnowledgeFileViewModel.From(x)), - Count = data.Count - }; - } - - [HttpGet("/knowledge/document/{collection}/file/{fileId}")] - public async Task GetKnowledgeDocument([FromRoute] string collection, [FromRoute] Guid fileId) - { - var file = await _knowledgeService.GetKnowledgeDocumentBinaryData(collection, fileId); - var stream = file.FileBinaryData.ToStream(); - stream.Position = 0; - - return new FileStreamResult(stream, file.ContentType) { FileDownloadName = file.FileName }; - } - #endregion - - #region Graph [HttpPost("/knowledge/graph/search")] public async Task SearchGraphKnowledge([FromBody] SearchGraphKnowledgeRequest request) diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/Request/QueryVectorDataRequest.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/Request/QueryVectorDataRequest.cs index b3a3841bb..331bb01f9 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/Request/QueryVectorDataRequest.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/Request/QueryVectorDataRequest.cs @@ -1,4 +1,4 @@ -namespace BotSharp.OpenAPI.ViewModels.Knowledges.Request; +namespace BotSharp.OpenAPI.ViewModels.Knowledges; public class QueryVectorDataRequest { diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/Request/TokenizeRequest.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/Request/TokenizeRequest.cs new file mode 100644 index 000000000..125013d0d --- /dev/null +++ b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/Request/TokenizeRequest.cs @@ -0,0 +1,10 @@ +using BotSharp.Abstraction.Tokenizers.Models; + +namespace BotSharp.OpenAPI.ViewModels.Knowledges; + +public class TokenizeRequest +{ + public string Text { get; set; } = string.Empty; + public string? Provider { get; set; } + public TokenizeOptions? Options { get; set; } +} diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/Request/VectorCollectionIndexRequest.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/Request/VectorCollectionIndexRequest.cs index 94e66ff03..5ff787fa6 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/Request/VectorCollectionIndexRequest.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/Request/VectorCollectionIndexRequest.cs @@ -1,6 +1,6 @@ using BotSharp.Abstraction.VectorStorage.Options; -namespace BotSharp.OpenAPI.ViewModels.Knowledges.Request; +namespace BotSharp.OpenAPI.ViewModels.Knowledges; public class CreateVectorCollectionIndexRequest { diff --git a/src/Plugins/BotSharp.Plugin.ChatHub/Hooks/StreamingLogHook.cs b/src/Plugins/BotSharp.Plugin.ChatHub/Hooks/StreamingLogHook.cs index 321deed56..31e6056dd 100644 --- a/src/Plugins/BotSharp.Plugin.ChatHub/Hooks/StreamingLogHook.cs +++ b/src/Plugins/BotSharp.Plugin.ChatHub/Hooks/StreamingLogHook.cs @@ -196,14 +196,14 @@ public async Task AfterGenerated(RoleDialogModel message, TokenStatsModel tokenS var agent = await _agentService.GetAgent(message.CurrentAgentId); - var log = tokenStats.Prompt; + if (string.IsNullOrWhiteSpace(tokenStats.Prompt)) return; var input = new ContentLogInputModel(conversationId, message) { Name = agent?.Name, AgentId = agent?.Id, Source = ContentLogSource.Prompt, - Log = log + Log = tokenStats.Prompt }; await SendEvent(ChatEvent.OnConversationContentLogGenerated, conversationId, BuildContentLog(input)); } diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/BotSharp.Plugin.FuzzySharp.csproj b/src/Plugins/BotSharp.Plugin.FuzzySharp/BotSharp.Plugin.FuzzySharp.csproj index 8561dc204..c835381b1 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/BotSharp.Plugin.FuzzySharp.csproj +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/BotSharp.Plugin.FuzzySharp.csproj @@ -13,9 +13,14 @@ + + + + + \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/MatchReason.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/MatchReason.cs index f46b3abf7..e7741c32d 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/MatchReason.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/MatchReason.cs @@ -1,4 +1,3 @@ - namespace BotSharp.Plugin.FuzzySharp.Constants; public static class MatchReason @@ -6,15 +5,15 @@ public static class MatchReason /// /// Token matched a synonym term (e.g., HVAC -> Air Conditioning/Heating) /// - public const string SynonymMatch = "synonym_match"; + public static MatchPriority SynonymMatch = new(3, "synonym_match"); /// /// Token exactly matched a vocabulary entry /// - public const string ExactMatch = "exact_match"; + public static MatchPriority ExactMatch = new(2, "exact_match"); /// /// Token was flagged as a potential typo and a correction was suggested /// - public const string TypoCorrection = "typo_correction"; + public static MatchPriority FuzzyMatch = new(1, "fuzzy_match"); } diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/TextConstants.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/TextConstants.cs index a8c749d13..073941f90 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/TextConstants.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/TextConstants.cs @@ -1,4 +1,3 @@ - namespace BotSharp.Plugin.FuzzySharp.Constants; public static class TextConstants diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Controllers/FuzzySharpController.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Controllers/FuzzySharpController.cs deleted file mode 100644 index bd1288b92..000000000 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Controllers/FuzzySharpController.cs +++ /dev/null @@ -1,59 +0,0 @@ -using BotSharp.Abstraction.Knowledges; -using BotSharp.Abstraction.Knowledges.Models; -using Microsoft.AspNetCore.Http; -using Microsoft.AspNetCore.Mvc; -using Microsoft.Extensions.Logging; - -namespace BotSharp.Plugin.FuzzySharp.Controllers; - -[ApiController] -public class FuzzySharpController : ControllerBase -{ - private readonly IPhraseService _phraseService; - private readonly ILogger _logger; - - public FuzzySharpController( - IPhraseService phraseService, - ILogger logger) - { - _phraseService = phraseService; - _logger = logger; - } - - /// - /// Analyze text for typos and entities using vocabulary. - /// - /// Returns: - /// - `original`: Original input text - /// - `tokens`: Tokenized text (only included if `include_tokens=true`) - /// - `flagged`: List of flagged items (each with `match_type`): - /// - `synonym_match` - Business abbreviations (confidence=1.0) - /// - `exact_match` - Exact vocabulary matches (confidence=1.0) - /// - `typo_correction` - Spelling corrections (confidence less than 1.0) - /// - `processing_time_ms`: Processing time in milliseconds - /// - /// Text analysis request - /// Text analysis response - [HttpPost("fuzzy-sharp/analyze-text")] - [ProducesResponseType(typeof(List), StatusCodes.Status200OK)] - [ProducesResponseType(StatusCodes.Status400BadRequest)] - [ProducesResponseType(StatusCodes.Status500InternalServerError)] - public async Task AnalyzeText([FromBody] string text) - { - try - { - if (string.IsNullOrWhiteSpace(text)) - { - return BadRequest(new { error = "Text is required" }); - } - - var result = await _phraseService.SearchPhrasesAsync(text); - return Ok(result); - } - catch (Exception ex) - { - _logger.LogError(ex, "Error analyzing and searching entities"); - return StatusCode(500, new { error = $"Error analyzing and searching entities: {ex.Message}" }); - } - } -} diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Arguments/TextAnalysisRequest.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Arguments/TextAnalysisRequest.cs deleted file mode 100644 index 92bfad905..000000000 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Arguments/TextAnalysisRequest.cs +++ /dev/null @@ -1,13 +0,0 @@ - -namespace BotSharp.Plugin.FuzzySharp.FuzzSharp.Arguments; - -public class TextAnalysisRequest -{ - public string Text { get; set; } = string.Empty; - public string? VocabularyFolderName { get; set; } - public string? SynonymMappingFile { get; set; } - public double Cutoff { get; set; } = 0.82; - public int TopK { get; set; } = 5; - public int MaxNgram { get; set; } = 5; - public bool IncludeTokens { get; set; } = false; -} \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs index 1a125ea08..fc1a75326 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs @@ -1,9 +1,4 @@ -using BotSharp.Plugin.FuzzySharp.FuzzSharp; -using BotSharp.Abstraction.Knowledges; using BotSharp.Abstraction.Plugins; -using BotSharp.Plugin.FuzzySharp.Services; -using BotSharp.Plugin.FuzzySharp.Services.Matching; -using BotSharp.Plugin.FuzzySharp.Services.Processors; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; @@ -18,10 +13,15 @@ public class FuzzySharpPlugin : IBotSharpPlugin public void RegisterDI(IServiceCollection services, IConfiguration config) { + var settings = new FuzzySharpSettings(); + config.Bind("FuzzySharp", settings); + services.AddSingleton(provider => settings); + services.AddScoped(); services.AddScoped(); - services.AddScoped(); - services.AddScoped(); + services.AddScoped(); + services.AddScoped(); + services.AddScoped(); services.AddScoped(); services.AddScoped(); diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/INgramProcessor.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Interfaces/INgramProcessor.cs similarity index 74% rename from src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/INgramProcessor.cs rename to src/Plugins/BotSharp.Plugin.FuzzySharp/Interfaces/INgramProcessor.cs index 90a9a06f1..917022bc7 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/INgramProcessor.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Interfaces/INgramProcessor.cs @@ -1,6 +1,4 @@ -using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models; - -namespace BotSharp.Plugin.FuzzySharp.FuzzSharp; +namespace BotSharp.Plugin.FuzzySharp.Interfaces; public interface INgramProcessor { @@ -15,11 +13,11 @@ public interface INgramProcessor /// Minimum confidence threshold for fuzzy matching /// Maximum number of matches to return /// List of flagged items - List ProcessNgrams( + List ProcessNgrams( List tokens, Dictionary> vocabulary, - Dictionary synonymMapping, - Dictionary Sources)> lookup, + Dictionary synonymMapping, + Dictionary Sources)> lookup, int maxNgram, double cutoff, int topK); diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/IResultProcessor.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Interfaces/IResultProcessor.cs similarity index 73% rename from src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/IResultProcessor.cs rename to src/Plugins/BotSharp.Plugin.FuzzySharp/Interfaces/IResultProcessor.cs index c900877bf..1a059ef7e 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/IResultProcessor.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Interfaces/IResultProcessor.cs @@ -1,6 +1,4 @@ -using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models; - -namespace BotSharp.Plugin.FuzzySharp.FuzzSharp; +namespace BotSharp.Plugin.FuzzySharp.Interfaces; /// /// Result processor interface @@ -13,5 +11,5 @@ public interface IResultProcessor /// /// List of flagged items to process /// Processed list of flagged items (deduplicated and sorted) - List ProcessResults(List flagged); + List ProcessResults(List flagged); } diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITokenMatcher.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Interfaces/ITokenMatcher.cs similarity index 76% rename from src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITokenMatcher.cs rename to src/Plugins/BotSharp.Plugin.FuzzySharp/Interfaces/ITokenMatcher.cs index c715a8255..9fb965fe0 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITokenMatcher.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Interfaces/ITokenMatcher.cs @@ -1,4 +1,4 @@ -namespace BotSharp.Plugin.FuzzySharp.FuzzSharp; +namespace BotSharp.Plugin.FuzzySharp.Interfaces; public interface ITokenMatcher { @@ -12,7 +12,7 @@ public interface ITokenMatcher /// /// Priority of this matcher (higher priority matchers are tried first) /// - int Priority { get; } + MatchPriority Priority { get; } } /// @@ -24,8 +24,8 @@ public record MatchContext( int StartIndex, int NgramLength, Dictionary> Vocabulary, - Dictionary SynonymMapping, - Dictionary Sources)> Lookup, + Dictionary SynonymMapping, + Dictionary Sources)> Lookup, double Cutoff, int TopK); @@ -35,5 +35,5 @@ public record MatchContext( public record MatchResult( string CanonicalForm, List Sources, - string MatchType, + MatchPriority MatchType, double Confidence); diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/FlaggedItem.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Models/FlaggedTokenItem.cs similarity index 67% rename from src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/FlaggedItem.cs rename to src/Plugins/BotSharp.Plugin.FuzzySharp/Models/FlaggedTokenItem.cs index 67bbd2802..ef96779d8 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/FlaggedItem.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Models/FlaggedTokenItem.cs @@ -1,12 +1,11 @@ +namespace BotSharp.Plugin.FuzzySharp.Models; -namespace BotSharp.Plugin.FuzzySharp.FuzzSharp.Models; - -public class FlaggedItem +public class FlaggedTokenItem { public int Index { get; set; } public string Token { get; set; } = string.Empty; public List Sources { get; set; } = new(); - public string MatchType { get; set; } = string.Empty; + public MatchPriority MatchType { get; set; } = new(); public string CanonicalForm { get; set; } = string.Empty; public double Confidence { get; set; } public int NgramLength { get; set; } diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Models/MatchPriority.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Models/MatchPriority.cs new file mode 100644 index 000000000..673c6df19 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Models/MatchPriority.cs @@ -0,0 +1,24 @@ +namespace BotSharp.Plugin.FuzzySharp.Models; + +public class MatchPriority +{ + public int Order { get; set; } + public string Name { get; set; } = string.Empty; + + public MatchPriority() + { + + } + + public MatchPriority(int order, string name) + { + Order = order; + Name = name; + } + + public override string ToString() + { + return $"{Name} => {Order}"; + ; + } +} diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/TextAnalysisResponse.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Models/TokenAnalysisResponse.cs similarity index 50% rename from src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/TextAnalysisResponse.cs rename to src/Plugins/BotSharp.Plugin.FuzzySharp/Models/TokenAnalysisResponse.cs index 0a05d9cd1..c5d62daf5 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/TextAnalysisResponse.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Models/TokenAnalysisResponse.cs @@ -1,10 +1,9 @@ +namespace BotSharp.Plugin.FuzzySharp.Models; -namespace BotSharp.Plugin.FuzzySharp.FuzzSharp.Models; - -public class TextAnalysisResponse +public class TokenAnalysisResponse { public string Original { get; set; } = string.Empty; public List? Tokens { get; set; } - public List Flagged { get; set; } = new(); + public List FlaggedItems { get; set; } = new(); public double ProcessingTimeMs { get; set; } } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/CsvPhraseCollectionLoader.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/DataLoaders/CsvTokenDataLoader.cs similarity index 52% rename from src/Plugins/BotSharp.Plugin.FuzzySharp/Services/CsvPhraseCollectionLoader.cs rename to src/Plugins/BotSharp.Plugin.FuzzySharp/Services/DataLoaders/CsvTokenDataLoader.cs index af471bfee..beca5c6b3 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/CsvPhraseCollectionLoader.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/DataLoaders/CsvTokenDataLoader.cs @@ -1,4 +1,4 @@ -using BotSharp.Abstraction.Knowledges; +using BotSharp.Abstraction.Utilities; using BotSharp.Core.Infrastructures; using CsvHelper; using CsvHelper.Configuration; @@ -6,31 +6,37 @@ using System.Globalization; using System.IO; -namespace BotSharp.Plugin.FuzzySharp.Services; +namespace BotSharp.Plugin.FuzzySharp.Services.DataLoaders; -public class CsvPhraseCollectionLoader : IPhraseCollection +public class CsvTokenDataLoader : ITokenDataLoader { - private readonly ILogger _logger; + private readonly ILogger _logger; + private readonly FuzzySharpSettings _settings; + private readonly string _basePath; - public CsvPhraseCollectionLoader(ILogger logger) + public CsvTokenDataLoader( + ILogger logger, + FuzzySharpSettings settings) { + _settings = settings; _logger = logger; + _basePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, settings.Data?.BaseDir ?? "data/tokens"); } + public string Provider => "fuzzy-sharp-csv"; + +#if !DEBUG [SharpCache(60)] +#endif public async Task>> LoadVocabularyAsync() { - string foldername = ""; var vocabulary = new Dictionary>(); - if (string.IsNullOrEmpty(foldername)) - { - return vocabulary; - } - // Load CSV files from the folder - var csvFileDict = await LoadCsvFilesFromFolderAsync(foldername); - if (csvFileDict.Count == 0) + var folderName = _settings.Data?.Vocabulary?.Folder ?? string.Empty; + var fileNames = _settings.Data?.Vocabulary?.FileNames?.Where(x => Path.GetFileName(x).EndsWith(".csv")); + var csvFileDict = GetCsvFilesMetaData(folderName, fileNames); + if (csvFileDict.IsNullOrEmpty()) { return vocabulary; } @@ -40,7 +46,7 @@ public async Task>> LoadVocabularyAsync() { try { - var terms = await LoadCsvFileAsync(filePath); + var terms = await LoadVocabularyFileAsync(filePath); vocabulary[source] = terms; _logger.LogInformation($"Loaded {terms.Count} terms for source '{source}' from {filePath}"); } @@ -53,74 +59,93 @@ public async Task>> LoadVocabularyAsync() return vocabulary; } +#if !DEBUG [SharpCache(60)] - public async Task> LoadSynonymMappingAsync() +#endif + public async Task> LoadSynonymMappingAsync() { - string filename = ""; - var result = new Dictionary(); - if (string.IsNullOrWhiteSpace(filename)) - { - return result; - } + var result = new Dictionary(); - var searchFolder = Path.Combine(AppContext.BaseDirectory, "data", "plugins", "fuzzySharp"); - var filePath = Path.Combine(searchFolder, filename); - - if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath)) + var folderName = _settings.Data?.Synonym?.Folder ?? string.Empty; + var fileNames = _settings.Data?.Synonym?.FileNames?.Where(x => Path.GetFileName(x).EndsWith(".csv")); + var csvFileDict = GetCsvFilesMetaData(folderName, fileNames); + if (csvFileDict.IsNullOrEmpty()) { return result; } - try + // Load each CSV file + foreach (var (source, filePath) in csvFileDict) { - using var reader = new StreamReader(filePath); - using var csv = new CsvReader(reader, CreateCsvConfig()); - - await csv.ReadAsync(); - csv.ReadHeader(); - - if (!HasRequiredColumns(csv)) + try { - _logger.LogWarning("Synonym mapping file missing required columns: {FilePath}", filePath); - return result; + var mapping = await LoadSynonymFileAsync(filePath); + foreach (var item in mapping) + { + result[item.Key] = item.Value; + } } - - while (await csv.ReadAsync()) + catch (Exception ex) { - var term = csv.GetField("term") ?? string.Empty; - var dbPath = csv.GetField("dbPath") ?? string.Empty; - var canonicalForm = csv.GetField("canonical_form") ?? string.Empty; + _logger.LogError(ex, $"Error loading CSV file for source '{source}': {filePath}"); + } + } - if (term.Length == 0 || dbPath.Length == 0 || canonicalForm.Length == 0) - { - _logger.LogWarning( - "Missing column(s) in CSV at row {Row}: term={Term}, dbPath={DbPath}, canonical_form={CanonicalForm}", - csv.Parser.RawRow, - term ?? "", - dbPath ?? "", - canonicalForm ?? ""); - continue; - } + return result; + } - var key = term.ToLowerInvariant(); - result[key] = (dbPath, canonicalForm); - } - _logger.LogInformation("Loaded synonym mapping from {FilePath}: {Count} terms", filePath, result.Count); + #region Private methods + /// + /// Load [csv file name] => file path + /// + /// + /// + private Dictionary GetCsvFilesMetaData(string folderName, IEnumerable? fileNames = null) + { + var csvFileDict = new Dictionary(); + if (string.IsNullOrWhiteSpace(folderName)) + { + return csvFileDict; } - catch (Exception ex) + + var searchFolder = Path.Combine(_basePath, folderName); + if (!Directory.Exists(searchFolder)) { - _logger.LogError(ex, "Error loading synonym mapping file: {FilePath}", filePath); + _logger.LogWarning($"Folder does not exist: {searchFolder}"); + return csvFileDict; } - return result; + IEnumerable csvFiles = new List(); + if (!fileNames.IsNullOrEmpty()) + { + csvFiles = fileNames!.Select(x => Path.Combine(searchFolder, x)); + } + else + { + csvFiles = Directory.GetFiles(searchFolder, "*.csv"); + } + + foreach (var file in csvFiles) + { + var fileName = Path.GetFileNameWithoutExtension(file); + csvFileDict[fileName] = file; + } + + _logger.LogInformation($"Loaded {csvFileDict.Count} CSV files from {searchFolder}"); + return csvFileDict; } - private async Task> LoadCsvFileAsync(string filePath) + /// + /// Load the first column in the vocabulary file + /// + /// + /// + private async Task> LoadVocabularyFileAsync(string filePath) { var terms = new HashSet(StringComparer.OrdinalIgnoreCase); - if (!File.Exists(filePath)) + if (string.IsNullOrWhiteSpace(filePath) || !File.Exists(filePath)) { _logger.LogWarning($"CSV file does not exist: {filePath}"); return terms; @@ -146,27 +171,63 @@ private async Task> LoadCsvFileAsync(string filePath) return terms; } - private async Task> LoadCsvFilesFromFolderAsync(string folderName) + + private async Task> LoadSynonymFileAsync(string filePath) { - var csvFileDict = new Dictionary(); - var searchFolder = Path.Combine(AppContext.BaseDirectory, "data", "plugins", "fuzzySharp", folderName); - if (!Directory.Exists(searchFolder)) + var result = new Dictionary(); + + if (string.IsNullOrWhiteSpace(filePath) || !File.Exists(filePath)) { - _logger.LogWarning($"Folder does not exist: {searchFolder}"); - return csvFileDict; + _logger.LogWarning($"CSV file does not exist: {filePath}"); + return result; } - var csvFiles = Directory.GetFiles(searchFolder, "*.csv"); - foreach (var file in csvFiles) + try { - var fileName = Path.GetFileNameWithoutExtension(file); - csvFileDict[fileName] = file; - } + using var reader = new StreamReader(filePath); + using var csv = new CsvReader(reader, CreateCsvConfig()); - _logger.LogInformation($"Loaded {csvFileDict.Count} CSV files from {searchFolder}"); - return await Task.FromResult(csvFileDict); + await csv.ReadAsync(); + csv.ReadHeader(); + + if (!HasRequiredColumns(csv)) + { + _logger.LogWarning("Synonym mapping file missing required columns: {FilePath}", filePath); + return result; + } + + while (await csv.ReadAsync()) + { + var term = csv.GetField("term") ?? string.Empty; + var dataSource = csv.GetField("dbPath") ?? string.Empty; + var canonicalForm = csv.GetField("canonical_form") ?? string.Empty; + + if (term.Length == 0 || dataSource.Length == 0 || canonicalForm.Length == 0) + { + _logger.LogWarning( + "Missing column(s) in CSV at row {Row}: term={Term}, dataSource={dataSource}, canonical_form={CanonicalForm}", + csv.Parser.RawRow, + term ?? "", + dataSource ?? "", + canonicalForm ?? ""); + continue; + } + + var key = term.ToLowerInvariant(); + result[key] = (dataSource, canonicalForm); + } + + _logger.LogInformation("Loaded synonym mapping from {FilePath}: {Count} terms", filePath, result.Count); + return result; + } + catch (Exception ex) + { + _logger.LogError(ex, "Error loading synonym mapping file: {FilePath}", filePath); + return result; + } } + private static CsvConfiguration CreateCsvConfig() { return new CsvConfiguration(CultureInfo.InvariantCulture) @@ -184,4 +245,5 @@ private static bool HasRequiredColumns(CsvReader csv) && headers.Contains("dbPath") && headers.Contains("canonical_form"); } + #endregion } diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/PhraseService.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/FuzzySharpTokenizer.cs similarity index 55% rename from src/Plugins/BotSharp.Plugin.FuzzySharp/Services/PhraseService.cs rename to src/Plugins/BotSharp.Plugin.FuzzySharp/Services/FuzzySharpTokenizer.cs index cd05ca6a6..863cda1b0 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/PhraseService.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/FuzzySharpTokenizer.cs @@ -1,97 +1,95 @@ -using BotSharp.Plugin.FuzzySharp.FuzzSharp; -using BotSharp.Plugin.FuzzySharp.FuzzSharp.Arguments; -using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models; -using BotSharp.Abstraction.Knowledges; -using BotSharp.Abstraction.Knowledges.Models; -using BotSharp.Plugin.FuzzySharp.Utils; using Microsoft.Extensions.Logging; using System.Diagnostics; namespace BotSharp.Plugin.FuzzySharp.Services; -public class PhraseService : IPhraseService +public class FuzzySharpTokenizer : ITokenizer { - private readonly ILogger _logger; - private readonly IEnumerable _phraseLoaderServices; + private readonly ILogger _logger; + private readonly IEnumerable _tokenDataLoaders; private readonly INgramProcessor _ngramProcessor; private readonly IResultProcessor _resultProcessor; - public PhraseService( - ILogger logger, - IEnumerable phraseLoaderServices, + public FuzzySharpTokenizer( + ILogger logger, + IEnumerable tokenDataLoaders, INgramProcessor ngramProcessor, IResultProcessor resultProcessor) { _logger = logger; - _phraseLoaderServices = phraseLoaderServices; + _tokenDataLoaders = tokenDataLoaders; _ngramProcessor = ngramProcessor; _resultProcessor = resultProcessor; } - public Task> SearchPhrasesAsync(string term) + public string Provider => "fuzzy-sharp"; + + public async Task TokenizeAsync(string text, TokenizeOptions? options = null) { - var request = BuildTextAnalysisRequest(term); - var response = AnalyzeTextAsync(request); - return response.ContinueWith(t => + var response = new TokenizeResponse(); + + try { - var results = t.Result.Flagged.Select(f => new SearchPhrasesResult - { - Token = f.Token, - Sources = f.Sources, - CanonicalForm = f.CanonicalForm, - MatchType = f.MatchType, - Confidence = f.Confidence - }).ToList(); - return results; - }); - } + var result = await AnalyzeTextAsync(text, options); - private TextAnalysisRequest BuildTextAnalysisRequest(string inputText) - { - return new TextAnalysisRequest + return new TokenizeResponse + { + Success = true, + Results = result?.FlaggedItems?.Select(f => new TokenizeResult + { + Token = f.Token, + CanonicalText = f.CanonicalForm, + Data = new Dictionary + { + ["sources"] = f.Sources, + ["match_type"] = f.MatchType.Name, + ["confidence"] = f.Confidence + } + })?.ToList() ?? [] + }; + } + catch (Exception ex) { - Text = inputText - }; + _logger.LogError(ex, $"Error when tokenize in {Provider}: {text}."); + response.ErrorMsg = ex.Message; + return response; + } } /// /// Analyze text for typos and entities using domain-specific vocabulary /// - private async Task AnalyzeTextAsync(TextAnalysisRequest request) + private async Task AnalyzeTextAsync(string text, TokenizeOptions? options = null) { var stopwatch = Stopwatch.StartNew(); try { // Tokenize the text - var tokens = TextTokenizer.Tokenize(request.Text); + var tokens = TokenHelper.Tokenize(text); // Load vocabulary - var vocabulary = await LoadAllVocabularyAsync(); + var vocabulary = await LoadAllVocabularyAsync(options?.DataProviders); // Load synonym mapping - var synonymMapping = await LoadAllSynonymMappingAsync(); + var synonymMapping = await LoadAllSynonymMappingAsync(options?.DataProviders); // Analyze text - var flagged = AnalyzeTokens(tokens, vocabulary, synonymMapping, request); + var flaggedItems = AnalyzeTokens(tokens, vocabulary, synonymMapping, options); stopwatch.Stop(); - var response = new TextAnalysisResponse + var response = new TokenAnalysisResponse { - Original = request.Text, - Flagged = flagged, + Original = text, + Tokens = tokens, + FlaggedItems = flaggedItems, ProcessingTimeMs = Math.Round(stopwatch.Elapsed.TotalMilliseconds, 2) }; - if (request.IncludeTokens) - { - response.Tokens = tokens; - } - _logger.LogInformation( $"Text analysis completed in {response.ProcessingTimeMs}ms | " + - $"Text length: {request.Text.Length} chars | " + - $"Flagged items: {flagged.Count}"); + $"Text length: {text.Length} chars | " + + $"Flagged items: {flaggedItems.Count}"); return response; } @@ -102,9 +100,10 @@ private async Task AnalyzeTextAsync(TextAnalysisRequest re } } - public async Task>> LoadAllVocabularyAsync() + public async Task>> LoadAllVocabularyAsync(IEnumerable? dataProviders = null) { - var results = await Task.WhenAll(_phraseLoaderServices.Select(c => c.LoadVocabularyAsync())); + var dataLoaders = _tokenDataLoaders.Where(x => dataProviders == null || dataProviders.Contains(x.Provider)); + var results = await Task.WhenAll(dataLoaders.Select(c => c.LoadVocabularyAsync())); var merged = new Dictionary>(); foreach (var dict in results) @@ -112,24 +111,31 @@ public async Task>> LoadAllVocabularyAsync() foreach (var kvp in dict) { if (!merged.TryGetValue(kvp.Key, out var set)) + { merged[kvp.Key] = new HashSet(kvp.Value); + } else + { set.UnionWith(kvp.Value); + } } } return merged; } - public async Task> LoadAllSynonymMappingAsync() + public async Task> LoadAllSynonymMappingAsync(IEnumerable? dataProviders = null) { - var results = await Task.WhenAll(_phraseLoaderServices.Select(c => c.LoadSynonymMappingAsync())); + var dataLoaders = _tokenDataLoaders.Where(x => dataProviders == null || dataProviders.Contains(x.Provider)); + var results = await Task.WhenAll(dataLoaders.Select(c => c.LoadSynonymMappingAsync())); var merged = new Dictionary(); foreach (var dict in results) { foreach (var kvp in dict) + { merged[kvp.Key] = kvp.Value; // later entries override earlier ones + } } return merged; @@ -138,11 +144,11 @@ public async Task>> LoadAllVocabularyAsync() /// /// Analyze tokens for typos and entities /// - private List AnalyzeTokens( + private List AnalyzeTokens( List tokens, Dictionary> vocabulary, - Dictionary synonymMapping, - TextAnalysisRequest request) + Dictionary synonymMapping, + TokenizeOptions? options) { // Build lookup table for O(1) exact match lookups (matching Python's build_lookup) var lookup = BuildLookup(vocabulary); @@ -153,9 +159,9 @@ private List AnalyzeTokens( vocabulary, synonymMapping, lookup, - request.MaxNgram, - request.Cutoff, - request.TopK); + options?.MaxNgram ?? 5, + options?.Cutoff ?? 0.82, + options?.TopK ?? 5); // Process results: deduplicate and sort return _resultProcessor.ProcessResults(flagged); @@ -168,10 +174,10 @@ private List AnalyzeTokens( /// /// Matches Python's build_lookup() function. /// - private Dictionary Sources)> BuildLookup( + private Dictionary Sources)> BuildLookup( Dictionary> vocabulary) { - var lookup = new Dictionary Sources)>(); + var lookup = new Dictionary Sources)>(); foreach (var (source, terms) in vocabulary) { @@ -189,7 +195,7 @@ private List AnalyzeTokens( else { // New term - create entry with single source in list - lookup[key] = (term, new List { source }); + lookup[key] = (term, new HashSet { source }); } } } diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/ExactMatcher.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/ExactMatcher.cs index 38e562eff..e8ab79797 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/ExactMatcher.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/ExactMatcher.cs @@ -1,11 +1,8 @@ -using BotSharp.Plugin.FuzzySharp.FuzzSharp; -using BotSharp.Plugin.FuzzySharp.Constants; - namespace BotSharp.Plugin.FuzzySharp.Services.Matching; public class ExactMatcher : ITokenMatcher { - public int Priority => 2; // Second highest priority + public MatchPriority Priority => MatchReason.ExactMatch; // Second highest priority public MatchResult? TryMatch(MatchContext context) { @@ -13,8 +10,8 @@ public class ExactMatcher : ITokenMatcher { return new MatchResult( CanonicalForm: match.CanonicalForm, - Sources: match.Sources, - MatchType: MatchReason.ExactMatch, + Sources: match.Sources.ToList(), + MatchType: Priority, Confidence: 1.0); } diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/FuzzyMatcher.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/FuzzyMatcher.cs index 193e28bc6..6b6d4be17 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/FuzzyMatcher.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/FuzzyMatcher.cs @@ -1,19 +1,17 @@ -using BotSharp.Plugin.FuzzySharp.FuzzSharp; using System.Text.RegularExpressions; using FuzzySharp; using FuzzySharp.SimilarityRatio; using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; -using BotSharp.Plugin.FuzzySharp.Constants; namespace BotSharp.Plugin.FuzzySharp.Services.Matching; public class FuzzyMatcher : ITokenMatcher { - public int Priority => 1; // Lowest priority + public MatchPriority Priority => MatchReason.FuzzyMatch; // Lowest priority public MatchResult? TryMatch(MatchContext context) { - var match = CheckTypoCorrection(context.ContentSpan, context.Lookup, context.Cutoff); + var match = FuzzyMatch(context.ContentSpan, context.Lookup, context.Cutoff); if (match == null) { return null; @@ -23,16 +21,16 @@ public class FuzzyMatcher : ITokenMatcher return new MatchResult( CanonicalForm: canonicalForm, Sources: sources, - MatchType: MatchReason.TypoCorrection, + MatchType: Priority, Confidence: confidence); } /// /// Check typo correction using fuzzy matching /// - private (string CanonicalForm, List Sources, double Confidence)? CheckTypoCorrection( + private (string CanonicalForm, List Sources, double Confidence)? FuzzyMatch( string contentSpan, - Dictionary Sources)> lookup, + Dictionary Sources)> lookup, double cutoff) { // Convert cutoff to 0-100 scale for FuzzySharp @@ -46,19 +44,18 @@ public class FuzzyMatcher : ITokenMatcher var result = Process.ExtractOne( contentSpan, candidates, - candidate => Normalize(candidate), // Preprocessor function + candidate => Normalize(candidate), // Preprocessor function scorer, - scoreCutoff // Score cutoff + scoreCutoff // Score cutoff ); - if (result == null) + // Get the canonical form and sources from lookup + if (result == null || !lookup.TryGetValue(result.Value, out var match)) { return null; } - // Get the canonical form and sources from lookup - var match = lookup[result.Value]; - return (match.CanonicalForm, match.Sources, Math.Round(result.Score / 100.0, 3)); + return (match.CanonicalForm, match.Sources.ToList(), Math.Round(result.Score / 100.0, 3)); } /// diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/SynonymMatcher.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/SynonymMatcher.cs index 9f6d8f97d..715df7e75 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/SynonymMatcher.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/SynonymMatcher.cs @@ -1,11 +1,8 @@ -using BotSharp.Plugin.FuzzySharp.FuzzSharp; -using BotSharp.Plugin.FuzzySharp.Constants; - namespace BotSharp.Plugin.FuzzySharp.Services.Matching; public class SynonymMatcher : ITokenMatcher { - public int Priority => 3; // Highest priority + public MatchPriority Priority => MatchReason.SynonymMatch; // Highest priority public MatchResult? TryMatch(MatchContext context) { @@ -13,8 +10,8 @@ public class SynonymMatcher : ITokenMatcher { return new MatchResult( CanonicalForm: match.CanonicalForm, - Sources: new List { match.DbPath }, - MatchType: MatchReason.SynonymMatch, + Sources: new List { match.DataSource }, + MatchType: Priority, Confidence: 1.0); } diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/NgramProcessor.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/NgramProcessor.cs index 86e584067..2caf48b7d 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/NgramProcessor.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/NgramProcessor.cs @@ -1,6 +1,3 @@ -using BotSharp.Plugin.FuzzySharp.FuzzSharp; -using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models; - namespace BotSharp.Plugin.FuzzySharp.Services.Processors; public class NgramProcessor : INgramProcessor @@ -10,19 +7,19 @@ public class NgramProcessor : INgramProcessor public NgramProcessor(IEnumerable matchers) { // Sort matchers by priority (highest first) - _matchers = matchers.OrderByDescending(m => m.Priority).ToList(); + _matchers = matchers.OrderByDescending(m => m.Priority.Order).ToList(); } - public List ProcessNgrams( + public List ProcessNgrams( List tokens, Dictionary> vocabulary, - Dictionary synonymMapping, - Dictionary Sources)> lookup, + Dictionary synonymMapping, + Dictionary Sources)> lookup, int maxNgram, double cutoff, int topK) { - var flagged = new List(); + var flagged = new List(); // Process n-grams from largest to smallest for (int n = maxNgram; n >= 1; n--) @@ -52,13 +49,13 @@ public List ProcessNgrams( /// /// Process a single n-gram at the specified position /// - private FlaggedItem? ProcessSingleNgram( + private FlaggedTokenItem? ProcessSingleNgram( List tokens, int startIdx, int n, Dictionary> vocabulary, - Dictionary synonymMapping, - Dictionary Sources)> lookup, + Dictionary synonymMapping, + Dictionary Sources)> lookup, double cutoff, int topK) { @@ -73,14 +70,14 @@ public List ProcessNgrams( // Try matching in priority order using matchers var context = new MatchContext( - contentSpan, - contentLow, - startIdx, + contentSpan, + contentLow, + startIdx, n, vocabulary, - synonymMapping, + synonymMapping, lookup, - cutoff, + cutoff, topK); foreach (var matcher in _matchers) @@ -98,13 +95,13 @@ public List ProcessNgrams( /// /// Create a FlaggedItem from a MatchResult /// - private FlaggedItem CreateFlaggedItem( + private FlaggedTokenItem CreateFlaggedItem( MatchResult matchResult, int startIndex, string contentSpan, int ngramLength) { - return new FlaggedItem + return new FlaggedTokenItem { Index = startIndex, Token = contentSpan, diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/ResultProcessor.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/ResultProcessor.cs index ea402804d..fd0bdb297 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/ResultProcessor.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/ResultProcessor.cs @@ -1,21 +1,17 @@ -using BotSharp.Plugin.FuzzySharp.FuzzSharp; -using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models; -using BotSharp.Plugin.FuzzySharp.Constants; - namespace BotSharp.Plugin.FuzzySharp.Services.Processors; public class ResultProcessor : IResultProcessor { - public List ProcessResults(List flagged) + public List ProcessResults(List flagged) { // Remove overlapping duplicates - var deduped = RemoveOverlappingDuplicates(flagged); + var items = RemoveOverlappingDuplicates(flagged); // Sort by confidence (descending), then match_type (alphabetically) // This matches Python's _sort_and_format_results function - return deduped + return items .OrderByDescending(f => f.Confidence) - .ThenBy(f => f.MatchType) + .ThenBy(f => f.MatchType.Order) .ToList(); } @@ -27,9 +23,9 @@ public List ProcessResults(List flagged) /// 2. Highest confidence /// 3. Shortest n-gram length /// - private List RemoveOverlappingDuplicates(List flagged) + private List RemoveOverlappingDuplicates(List flagged) { - var deduped = new List(); + var deduped = new List(); var skipIndices = new HashSet(); for (int i = 0; i < flagged.Count; i++) @@ -43,7 +39,7 @@ private List RemoveOverlappingDuplicates(List flagged) var itemRange = (item.Index, item.Index + item.NgramLength); // Find all overlapping items with same canonical_form (regardless of match_type) - var overlappingGroup = new List { item }; + var overlappingGroup = new List { item }; for (int j = i + 1; j < flagged.Count; j++) { if (skipIndices.Contains(j)) @@ -67,31 +63,20 @@ private List RemoveOverlappingDuplicates(List flagged) // Priority: synonym_match (3) > exact_match (2) > typo_correction (1) // Then highest confidence, then shortest ngram var bestItem = overlappingGroup - .OrderByDescending(x => GetMatchTypePriority(x.MatchType)) + .OrderByDescending(x => x.MatchType.Order) .ThenByDescending(x => x.Confidence) .ThenBy(x => x.NgramLength) - .First(); - deduped.Add(bestItem); + .FirstOrDefault(); + + if (bestItem != null) + { + deduped.Add(bestItem); + } } return deduped; } - /// - /// Get priority value for match type (higher is better) - /// Matches the priority order in matchers: synonym > exact > fuzzy - /// - private int GetMatchTypePriority(string matchType) - { - return matchType switch - { - MatchReason.SynonymMatch => 3, // Highest priority - MatchReason.ExactMatch => 2, // Second priority - MatchReason.TypoCorrection => 1, // Lowest priority - _ => 0 // Unknown types get lowest priority - }; - } - /// /// Check if two token ranges overlap. /// diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Settings/FuzzySharpSettings.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Settings/FuzzySharpSettings.cs new file mode 100644 index 000000000..2b75fabef --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Settings/FuzzySharpSettings.cs @@ -0,0 +1,20 @@ +namespace BotSharp.Plugin.FuzzySharp.Settings; + +public class FuzzySharpSettings +{ + public TokenDataSettings Data { get; set; } +} + +public class TokenDataSettings +{ + public string? BaseDir { get; set; } = "data/tokens"; + + public TokenFileSetting Vocabulary { get; set; } + public TokenFileSetting Synonym { get; set; } +} + +public class TokenFileSetting +{ + public string Folder { get; set; } + public string[] FileNames { get; set; } +} \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Using.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Using.cs index 1a0fe1eab..9a6ec03d5 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Using.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Using.cs @@ -3,3 +3,17 @@ global using System.Linq; global using System.Text; global using System.Threading.Tasks; + +global using BotSharp.Abstraction.Tokenizers; +global using BotSharp.Abstraction.Tokenizers.Models; +global using BotSharp.Abstraction.Tokenizers.Responses; + +global using BotSharp.Plugin.FuzzySharp.Models; +global using BotSharp.Plugin.FuzzySharp.Utils; +global using BotSharp.Plugin.FuzzySharp.Interfaces; +global using BotSharp.Plugin.FuzzySharp.Services; +global using BotSharp.Plugin.FuzzySharp.Services.DataLoaders; +global using BotSharp.Plugin.FuzzySharp.Services.Matching; +global using BotSharp.Plugin.FuzzySharp.Services.Processors; +global using BotSharp.Plugin.FuzzySharp.Settings; +global using BotSharp.Plugin.FuzzySharp.Constants; \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Utils/TextTokenizer.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Utils/TokenHelper.cs similarity index 87% rename from src/Plugins/BotSharp.Plugin.FuzzySharp/Utils/TextTokenizer.cs rename to src/Plugins/BotSharp.Plugin.FuzzySharp/Utils/TokenHelper.cs index 8853733a2..4d73e2e23 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Utils/TextTokenizer.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Utils/TokenHelper.cs @@ -1,17 +1,27 @@ -using BotSharp.Plugin.FuzzySharp.Constants; - namespace BotSharp.Plugin.FuzzySharp.Utils; -public static class TextTokenizer +public static class TokenHelper { + /// + /// Complete tokenization flow: preprocessing + tokenization + /// This is the recommended usage + /// + /// Text to tokenize + /// List of tokens + public static List Tokenize(string text) + { + var preprocessed = PreprocessText(text); + return SimpleTokenize(preprocessed); + } + /// /// Preprocess text: add spaces before and after characters that need to be separated /// This allows subsequent simple whitespace tokenization to correctly separate these characters - /// Example: "(IH)" -> " ( IH ) " -> ["(", "IH", ")"] + /// Example: "(AB)" -> " ( AB ) " -> ["(", "AB", ")"] /// /// Text to preprocess /// Preprocessed text - public static string PreprocessText(string text) + private static string PreprocessText(string text) { if (string.IsNullOrWhiteSpace(text)) { @@ -44,20 +54,8 @@ public static string PreprocessText(string text) /// /// Text to tokenize /// List of tokens - public static List SimpleTokenize(string text) + private static List SimpleTokenize(string text) { return text.Split(TextConstants.TokenSeparators, StringSplitOptions.RemoveEmptyEntries).ToList(); } - - /// - /// Complete tokenization flow: preprocessing + tokenization - /// This is the recommended usage - /// - /// Text to tokenize - /// List of tokens - public static List Tokenize(string text) - { - var preprocessed = PreprocessText(text); - return SimpleTokenize(preprocessed); - } } diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/ChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/ChatCompletionProvider.cs index d224fb122..54089050d 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/ChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/ChatCompletionProvider.cs @@ -1,10 +1,16 @@ +using Azure; using BotSharp.Abstraction.Files; using BotSharp.Abstraction.Files.Models; using BotSharp.Abstraction.Files.Utilities; using BotSharp.Abstraction.Hooks; +using BotSharp.Abstraction.MessageHub.Models; +using BotSharp.Core.Infrastructures.Streams; +using BotSharp.Core.MessageHub; +using Fluid; using GenerativeAI; using GenerativeAI.Core; using GenerativeAI.Types; +using Microsoft.Extensions.AI; namespace BotSharp.Plugin.GoogleAi.Providers.Chat; @@ -160,9 +166,140 @@ public async Task GetChatCompletionsAsync(Agent agent, List GetChatCompletionsStreamingAsync(Agent agent, List conversations) + public async Task GetChatCompletionsStreamingAsync(Agent agent, List conversations) { - throw new NotImplementedException(); + var client = ProviderHelper.GetGeminiClient(Provider, _model, _services); + var chatClient = client.CreateGenerativeModel(_model.ToModelId()); + var (prompt, request) = PrepareOptions(chatClient, agent, conversations); + + var hub = _services.GetRequiredService>>(); + var conv = _services.GetRequiredService(); + var messageId = conversations.LastOrDefault()?.MessageId ?? string.Empty; + + var contentHooks = _services.GetHooks(agent.Id); + // Before chat completion hook + foreach (var hook in contentHooks) + { + await hook.BeforeGenerating(agent, conversations); + } + + hub.Push(new() + { + EventName = ChatEvent.BeforeReceiveLlmStreamMessage, + RefId = conv.ConversationId, + Data = new RoleDialogModel(AgentRole.Assistant, string.Empty) + { + CurrentAgentId = agent.Id, + MessageId = messageId + } + }); + + using var textStream = new RealtimeTextStream(); + UsageMetadata? tokenUsage = null; + + var responseMessage = new RoleDialogModel(AgentRole.Assistant, string.Empty) + { + CurrentAgentId = agent.Id, + MessageId = messageId + }; + + await foreach (var response in chatClient.StreamContentAsync(request)) + { + var candidate = response?.Candidates?.FirstOrDefault(); + if (candidate == null) + { + continue; + } + + var part = candidate?.Content?.Parts?.FirstOrDefault(); + if (!string.IsNullOrEmpty(part?.Text)) + { + var text = part.Text; + textStream.Collect(text); + + var content = new RoleDialogModel(AgentRole.Assistant, text) + { + CurrentAgentId = agent.Id, + MessageId = messageId + }; + hub.Push(new() + { + EventName = ChatEvent.OnReceiveLlmStreamMessage, + RefId = conv.ConversationId, + Data = content + }); + } + + if (candidate.FinishReason == FinishReason.STOP) + { + if (part?.FunctionCall != null) + { + var functionCall = part.FunctionCall; + responseMessage = new RoleDialogModel(AgentRole.Function, string.Empty) + { + CurrentAgentId = agent.Id, + MessageId = messageId, + ToolCallId = functionCall.Id, + FunctionName = functionCall.Name, + FunctionArgs = functionCall.Args?.ToString() ?? string.Empty + }; + +#if DEBUG + _logger.LogDebug($"Tool Call (id: {functionCall.Id}) => {functionCall.Name}({functionCall.Args})"); +#endif + } + else + { + var allText = textStream.GetText(); +#if DEBUG + _logger.LogDebug($"Stream text Content: {allText}"); +#endif + + responseMessage = new RoleDialogModel(AgentRole.Assistant, allText) + { + CurrentAgentId = agent.Id, + MessageId = messageId, + IsStreaming = true + }; + } + + tokenUsage = response?.UsageMetadata; + } + else if (candidate.FinishReason.HasValue) + { + var text = candidate.FinishMessage ?? candidate.FinishReason.Value.ToString(); + responseMessage = new RoleDialogModel(AgentRole.Assistant, text) + { + CurrentAgentId = agent.Id, + MessageId = messageId, + IsStreaming = true + }; + + tokenUsage = response?.UsageMetadata; + } + } + + hub.Push(new() + { + EventName = ChatEvent.AfterReceiveLlmStreamMessage, + RefId = conv.ConversationId, + Data = responseMessage + }); + + // After chat completion hook + foreach (var hook in contentHooks) + { + await hook.AfterGenerated(responseMessage, new TokenStatsModel + { + Prompt = prompt, + Provider = Provider, + Model = _model, + TextInputTokens = tokenUsage?.PromptTokenCount ?? 0, + TextOutputTokens = tokenUsage?.CandidatesTokenCount ?? 0 + }); + } + + return responseMessage; } public void SetModelName(string model) diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/KnowledgeBasePlugin.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/KnowledgeBasePlugin.cs index 2bbc950a1..c50764594 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/KnowledgeBasePlugin.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/KnowledgeBasePlugin.cs @@ -1,9 +1,6 @@ +using BotSharp.Abstraction.Knowledges.Processors; using BotSharp.Abstraction.Plugins.Models; using BotSharp.Abstraction.Settings; -using BotSharp.Abstraction.Users.Enums; -using BotSharp.Plugin.KnowledgeBase.Converters; -using BotSharp.Plugin.KnowledgeBase.Hooks; -using BotSharp.Plugin.KnowledgeBase.Services; using Microsoft.Extensions.Configuration; namespace BotSharp.Plugin.KnowledgeBase; @@ -27,7 +24,7 @@ public void RegisterDI(IServiceCollection services, IConfiguration config) services.AddScoped(); services.AddScoped(); services.AddScoped(); - + services.AddScoped(); } public bool AttachMenu(List menu) @@ -40,7 +37,8 @@ public bool AttachMenu(List menu) { new PluginMenuDef("Q & A", link: "page/knowledge-base/question-answer"), new PluginMenuDef("Relationships", link: "page/knowledge-base/relationships"), - new PluginMenuDef("Documents", link: "page/knowledge-base/documents") + new PluginMenuDef("Documents", link: "page/knowledge-base/documents"), + new PluginMenuDef("Dictionary", link: "page/knowledge-base/dictionary") } }); return true; diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Processors/TextFileKnowledgeProcessor.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Processors/TextFileKnowledgeProcessor.cs new file mode 100644 index 000000000..9f91a9734 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Processors/TextFileKnowledgeProcessor.cs @@ -0,0 +1,50 @@ +using BotSharp.Abstraction.Files.Models; +using BotSharp.Abstraction.Knowledges.Helpers; +using BotSharp.Abstraction.Knowledges.Options; +using BotSharp.Abstraction.Knowledges.Processors; +using BotSharp.Abstraction.Knowledges.Responses; +using System.Net.Mime; + +namespace BotSharp.Plugin.KnowledgeBase.Processors; + +public class TextFileKnowledgeProcessor : IKnowledgeProcessor +{ + private readonly IServiceProvider _services; + private readonly ILogger _logger; + + public TextFileKnowledgeProcessor( + IServiceProvider services, + ILogger logger) + { + _services = services; + _logger = logger; + } + + public string Provider => "botsharp-txt-knowledge"; + + public async Task GetFileKnowledgeAsync(FileBinaryDataModel file, FileKnowledgeHandleOptions? options = null) + { + if (!file.ContentType.IsEqualTo(MediaTypeNames.Text.Plain)) + { + return new(); + } + + var binary = file.FileBinaryData; + using var stream = binary.ToStream(); + stream.Position = 0; + using var reader = new StreamReader(stream); + var content = await reader.ReadToEndAsync(); + reader.Close(); + stream.Close(); + + var lines = TextChopper.Chop(content, ChunkOption.Default()); + return new FileKnowledgeResponse + { + Success = true, + Knowledges = new List + { + new() { Contents = lines } + } + }; + } +} diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs index 24fc5546b..e73add1f9 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs @@ -1,12 +1,3 @@ -using BotSharp.Abstraction.Files; -using BotSharp.Abstraction.Files.Models; -using BotSharp.Abstraction.Files.Proccessors; -using BotSharp.Abstraction.Files.Utilities; -using BotSharp.Abstraction.Knowledges.Filters; -using BotSharp.Abstraction.Knowledges.Helpers; -using BotSharp.Abstraction.Knowledges.Options; -using BotSharp.Abstraction.Knowledges.Responses; -using BotSharp.Abstraction.VectorStorage.Enums; using System.Net.Http; namespace BotSharp.Plugin.KnowledgeBase.Services; @@ -369,27 +360,14 @@ public async Task GetKnowledgeDocumentBinaryData(string col #region Read doc content private async Task> GetFileKnowledge(FileBinaryDataModel file, KnowledgeDocOptions? options) { - var processor = _services.GetServices().FirstOrDefault(x => x.Provider.IsEqualTo(options?.Processor)); + var processor = _services.GetServices().FirstOrDefault(x => x.Provider.IsEqualTo(options?.Processor)); if (processor == null) { return Enumerable.Empty(); } var response = await processor.GetFileKnowledgeAsync(file, options: options); - return response?.Knowledges ?? []; - } - - private async Task> ReadTxt(BinaryData binary, ChunkOption option) - { - using var stream = binary.ToStream(); - stream.Position = 0; - using var reader = new StreamReader(stream); - var content = await reader.ReadToEndAsync(); - reader.Close(); - stream.Close(); - - var lines = TextChopper.Chop(content, option); - return lines; + return response?.Success == true ? response.Knowledges ?? [] : []; } #endregion diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs index d0817c802..61f5e686c 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs @@ -1,5 +1,3 @@ -using BotSharp.Abstraction.Files; -using BotSharp.Abstraction.VectorStorage.Enums; using BotSharp.Abstraction.VectorStorage.Filters; using BotSharp.Abstraction.VectorStorage.Options; diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Using.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Using.cs index 53ed55a2c..b2e8cb01e 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Using.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Using.cs @@ -14,14 +14,13 @@ global using BotSharp.Abstraction.Users; global using BotSharp.Abstraction.Utilities; global using BotSharp.Abstraction.Conversations.Models; -global using BotSharp.Abstraction.Agents.Settings; +global using BotSharp.Abstraction.Users.Enums; global using BotSharp.Abstraction.Graph; global using BotSharp.Abstraction.Knowledges.Settings; global using BotSharp.Abstraction.Knowledges.Enums; global using BotSharp.Abstraction.VectorStorage; global using BotSharp.Abstraction.VectorStorage.Models; global using BotSharp.Abstraction.Graph.Models; -global using BotSharp.Abstraction.Knowledges.Models; global using BotSharp.Abstraction.MLTasks; global using BotSharp.Abstraction.Functions; global using BotSharp.Abstraction.Messaging.Enums; @@ -32,5 +31,20 @@ global using BotSharp.Abstraction.Agents.Models; global using BotSharp.Abstraction.Functions.Models; global using BotSharp.Abstraction.Repositories; + +global using BotSharp.Abstraction.Files; +global using BotSharp.Abstraction.Files.Models; +global using BotSharp.Abstraction.Files.Utilities; +global using BotSharp.Abstraction.Knowledges.Models; +global using BotSharp.Abstraction.Knowledges.Filters; +global using BotSharp.Abstraction.Knowledges.Options; +global using BotSharp.Abstraction.Knowledges.Processors; +global using BotSharp.Abstraction.Knowledges.Responses; +global using BotSharp.Abstraction.VectorStorage.Enums; + global using BotSharp.Plugin.KnowledgeBase.Enum; -global using BotSharp.Plugin.KnowledgeBase.Helpers; \ No newline at end of file +global using BotSharp.Plugin.KnowledgeBase.Helpers; +global using BotSharp.Plugin.KnowledgeBase.Converters; +global using BotSharp.Plugin.KnowledgeBase.Hooks; +global using BotSharp.Plugin.KnowledgeBase.Processors; +global using BotSharp.Plugin.KnowledgeBase.Services; \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.MMPEmbedding/Providers/MMPEmbeddingProvider.cs b/src/Plugins/BotSharp.Plugin.MMPEmbedding/Providers/MMPEmbeddingProvider.cs index 9d1054096..3bc118381 100644 --- a/src/Plugins/BotSharp.Plugin.MMPEmbedding/Providers/MMPEmbeddingProvider.cs +++ b/src/Plugins/BotSharp.Plugin.MMPEmbedding/Providers/MMPEmbeddingProvider.cs @@ -1,8 +1,4 @@ -using System.Collections.Generic; using System.Text.RegularExpressions; -using BotSharp.Plugin.MMPEmbedding; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Logging; using OpenAI.Embeddings; namespace BotSharp.Plugin.MMPEmbedding.Providers; @@ -26,7 +22,7 @@ public class MMPEmbeddingProvider : ITextEmbedding public string Provider => "mmp-embedding"; public string Model => _model; - private static readonly Regex WordRegex = new(@"\b\w+\b", RegexOptions.Compiled); + private static readonly Regex _wordRegex = new(@"\b\w+\b", RegexOptions.Compiled); public MMPEmbeddingProvider(IServiceProvider serviceProvider, ILogger logger) { @@ -76,6 +72,31 @@ public async Task> GetVectorsAsync(List texts) return results; } + public void SetDimension(int dimension) + { + _dimension = dimension > 0 ? dimension : DEFAULT_DIMENSION; + } + + public int GetDimension() + { + return _dimension; + } + + public void SetModelName(string model) + { + _model = model; + } + + #region Private methods + /// + /// Sets the underlying provider to use for getting token embeddings + /// + /// Provider name (e.g., "openai", "azure-openai", "deepseek-ai") + public void SetUnderlyingProvider(string provider) + { + _underlyingProvider = provider; + } + /// /// Gets embeddings for individual tokens using the underlying provider /// @@ -113,7 +134,7 @@ private async Task> GetTokenEmbeddingsAsync(List tokens) /// Max pooling: element-wise maximum of all token embeddings /// Result: concatenation of mean and max pooled vectors /// - private float[] MeanMaxPooling(IReadOnlyList vectors, double meanWeight = 0.5, double maxWeight = 0.5) + private float[] MeanMaxPooling(IReadOnlyList vectors, float meanWeight = 0.5f, float maxWeight = 0.5f) { var numTokens = vectors.Count; @@ -128,40 +149,17 @@ private float[] MeanMaxPooling(IReadOnlyList vectors, double meanWeight .ToArray(); return Enumerable.Range(0, _dimension) - .Select(i => (float)meanWeight * meanPooled[i] + (float)maxWeight * maxPooled[i]) + .Select(i => meanWeight * meanPooled[i] + maxWeight * maxPooled[i]) .ToArray(); } - public void SetDimension(int dimension) - { - _dimension = dimension > 0 ? dimension : DEFAULT_DIMENSION; - } - - public int GetDimension() - { - return _dimension; - } - - public void SetModelName(string model) - { - _model = model; - } - - /// - /// Sets the underlying provider to use for getting token embeddings - /// - /// Provider name (e.g., "openai", "azure-openai", "deepseek-ai") - public void SetUnderlyingProvider(string provider) - { - _underlyingProvider = provider; - } - /// /// Tokenizes text into individual words /// - public static IEnumerable Tokenize(string text, string? pattern = null) + private static IEnumerable Tokenize(string text, string? pattern = null) { - var patternRegex = string.IsNullOrEmpty(pattern) ? WordRegex : new(pattern, RegexOptions.Compiled); + var patternRegex = !string.IsNullOrEmpty(pattern) ? new(pattern, RegexOptions.Compiled) : _wordRegex; return patternRegex.Matches(text).Cast().Select(m => m.Value); } + #endregion } diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Models/GlobalStatsMongoElement.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Models/GlobalStatsMongoElement.cs index f2c448b9a..434250973 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Models/GlobalStatsMongoElement.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Models/GlobalStatsMongoElement.cs @@ -4,6 +4,7 @@ namespace BotSharp.Plugin.MongoStorage.Models; public class StatsCountMongoElement { public long AgentCallCount { get; set; } + public int ImageGenerationTotalCount { get; set; } } [BsonIgnoreExtraElements(Inherited = true)] @@ -13,4 +14,5 @@ public class StatsLlmCostMongoElement public long CompletionTokens { get; set; } public float PromptTotalCost { get; set; } public float CompletionTotalCost { get; set; } + public float ImageGenerationTotalCost { get; set; } } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/MongoDbContext.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/MongoDbContext.cs index aabd45979..872d21f4a 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/MongoDbContext.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/MongoDbContext.cs @@ -92,8 +92,7 @@ private IMongoCollection GetCollectionOrCreate(string name } #region Indexes - - public void CreateIndexes() + private void CreateIndexes() { // Use Interlocked.CompareExchange to ensure the index is initialized only once, ensuring thread safety // 0 indicates uninitialized, 1 indicates initialized @@ -111,6 +110,7 @@ public void CreateIndexes() CreateAgentCodeScriptIndex(); CreateAgentTaskIndex(); } + private IMongoCollection CreateAgentCodeScriptIndex() { var collection = GetCollectionOrCreate("AgentCodeScripts"); diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Stats.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Stats.cs index 38f53b428..3c245643c 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Stats.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Stats.cs @@ -30,14 +30,16 @@ public partial class MongoRepository AgentId = agentId, Count = new() { - AgentCallCount = found.Count.AgentCallCount + AgentCallCount = found.Count.AgentCallCount, + ImageGenerationTotalCount = found.Count.ImageGenerationTotalCount, }, LlmCost = new() { PromptTokens = found.LlmCost.PromptTokens, CompletionTokens = found.LlmCost.CompletionTokens, PromptTotalCost = found.LlmCost.PromptTotalCost, - CompletionTotalCost = found.LlmCost.CompletionTotalCost + CompletionTotalCost = found.LlmCost.CompletionTotalCost, + ImageGenerationTotalCost = found.LlmCost.ImageGenerationTotalCost }, RecordTime = found.RecordTime, StartTime = startTime, @@ -68,10 +70,12 @@ public bool SaveGlobalStats(BotSharpStatsDelta delta) var updateDef = Builders.Update .SetOnInsert(x => x.Id, Guid.NewGuid().ToString()) .Inc(x => x.Count.AgentCallCount, delta.CountDelta.AgentCallCountDelta) + .Inc(x => x.Count.ImageGenerationTotalCount, delta.CountDelta.ImageGenerationTotalCountDelta) .Inc(x => x.LlmCost.PromptTokens, delta.LlmCostDelta.PromptTokensDelta) .Inc(x => x.LlmCost.CompletionTokens, delta.LlmCostDelta.CompletionTokensDelta) .Inc(x => x.LlmCost.PromptTotalCost, delta.LlmCostDelta.PromptTotalCostDelta) .Inc(x => x.LlmCost.CompletionTotalCost, delta.LlmCostDelta.CompletionTotalCostDelta) + .Inc(x => x.LlmCost.ImageGenerationTotalCost, delta.LlmCostDelta.ImageGenerationTotalCostDelta) .Set(x => x.StartTime, startTime) .Set(x => x.EndTime, endTime) .Set(x => x.Interval, delta.Interval) diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Models/Image/ImageGenerationResponse.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Models/Image/ImageGenerationResponse.cs new file mode 100644 index 000000000..5febed36d --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Models/Image/ImageGenerationResponse.cs @@ -0,0 +1,15 @@ +using OpenAI.Images; + +namespace BotSharp.Plugin.OpenAI.Models.Image; + +public class ImageGenerationResponse +{ + [JsonPropertyName("output_format")] + public string? OutputFormat { get; set; } + + [JsonPropertyName("quality")] + public string? Quality { get; set; } + + [JsonPropertyName("size")] + public string? Size { get; set; } +} diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Audio/AudioTranscriptionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Audio/AudioTranscriptionProvider.cs index 006a41b85..b32391fa5 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Audio/AudioTranscriptionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Audio/AudioTranscriptionProvider.cs @@ -1,4 +1,5 @@ using OpenAI.Audio; +using System.Drawing; namespace BotSharp.Plugin.OpenAI.Providers.Audio; @@ -23,21 +24,20 @@ public void SetModelName(string model) public async Task TranscriptTextAsync(Stream audio, string audioFileName, string? text = null) { + var settingsService = _services.GetRequiredService(); + var settings = settingsService.GetSetting(Provider, _model); + var audioClient = ProviderHelper.GetClient(Provider, _model, _services) .GetAudioClient(_model); - var options = PrepareTranscriptionOptions(text); + var options = PrepareTranscriptionOptions(text, settings?.Audio?.Transcription); var result = await audioClient.TranscribeAudioAsync(audio, audioFileName, options); return result.Value.Text; } - private AudioTranscriptionOptions PrepareTranscriptionOptions(string? text) + private AudioTranscriptionOptions PrepareTranscriptionOptions(string? text, AudioTranscriptionSetting? settings) { - var settingsService = _services.GetRequiredService(); var state = _services.GetRequiredService(); - - var settings = settingsService.GetSetting(Provider, _model)?.Audio?.Transcription; - var temperature = state.GetState("audio_temperature"); var responseFormat = state.GetState("audio_response_format"); var granularity = state.GetState("audio_granularity"); @@ -46,9 +46,9 @@ private AudioTranscriptionOptions PrepareTranscriptionOptions(string? text) { temperature = $"{settings.Temperature}"; } - - responseFormat = settings?.ResponseFormat != null ? LlmUtility.VerifyModelParameter(responseFormat, settings.ResponseFormat.Default, settings.ResponseFormat.Options) : null; - granularity = settings?.Granularity != null ? LlmUtility.VerifyModelParameter(granularity, settings.Granularity.Default, settings.Granularity.Options) : null; + + responseFormat = LlmUtility.GetModelParameter(settings?.Parameters, "ResponseFormat", responseFormat); + granularity = LlmUtility.GetModelParameter(settings?.Parameters, "Granularity", granularity); var options = new AudioTranscriptionOptions { diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs index 859e537fb..cffe6e6b0 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs @@ -1,5 +1,4 @@ #pragma warning disable OPENAI001 -using BotSharp.Abstraction.Hooks; using BotSharp.Abstraction.MessageHub.Models; using BotSharp.Core.Infrastructures.Streams; using BotSharp.Core.MessageHub; @@ -257,8 +256,9 @@ public async Task GetChatCompletionsStreamingAsync(Agent agent, { var text = choice.ContentUpdate[0]?.Text ?? string.Empty; textStream.Collect(text); - +#if DEBUG _logger.LogDebug($"Stream Content update: {text}"); +#endif var content = new RoleDialogModel(AgentRole.Assistant, text) { @@ -281,7 +281,9 @@ public async Task GetChatCompletionsStreamingAsync(Agent agent, var args = toolCalls.Where(x => x.FunctionArgumentsUpdate != null).Select(x => x.FunctionArgumentsUpdate.ToString()).ToList(); var functionArguments = string.Join(string.Empty, args); +#if DEBUG _logger.LogDebug($"Tool Call (id: {toolCallId}) => {functionName}({functionArguments})"); +#endif responseMessage = new RoleDialogModel(AgentRole.Function, string.Empty) { @@ -292,10 +294,12 @@ public async Task GetChatCompletionsStreamingAsync(Agent agent, FunctionArgs = functionArguments }; } - else if (choice.FinishReason.HasValue) + else if (choice.FinishReason == ChatFinishReason.Stop) { var allText = textStream.GetText(); +#if DEBUG _logger.LogDebug($"Stream text Content: {allText}"); +#endif responseMessage = new RoleDialogModel(AgentRole.Assistant, allText) { @@ -304,6 +308,18 @@ public async Task GetChatCompletionsStreamingAsync(Agent agent, IsStreaming = true }; } + else if (choice.FinishReason.HasValue) + { + var text = choice.FinishReason == ChatFinishReason.Length ? "Model reached the maximum number of tokens allowed." + : choice.FinishReason == ChatFinishReason.ContentFilter ? "Content is omitted due to content filter rule." + : choice.FinishReason.Value.ToString(); + responseMessage = new RoleDialogModel(AgentRole.Assistant, text) + { + CurrentAgentId = agent.Id, + MessageId = messageId, + IsStreaming = true + }; + } } hub.Push(new() diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Compose.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Compose.cs index 6215ba1af..a90da6091 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Compose.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Compose.cs @@ -13,12 +13,25 @@ public partial class ImageCompletionProvider /// RoleDialogModel containing the composed image(s) public async Task GetImageComposition(Agent agent, RoleDialogModel message, Stream[] images, string[] imageFileNames) { + var contentHooks = _services.GetHooks(agent.Id); + + // Before generating hook + foreach (var hook in contentHooks) + { + await hook.BeforeGenerating(agent, [message]); + } + + var settingsService = _services.GetRequiredService(); + var settings = settingsService.GetSetting(Provider, _model); + var client = ProviderHelper.GetClient(Provider, _model, _services); - var (prompt, imageCount, options) = PrepareEditOptions(message); + var (prompt, imageCount, options) = PrepareEditOptions(message, settings?.Image?.Edit); var imageClient = client.GetImageClient(_model); // Use the new extension method to support multiple images var response = imageClient.GenerateImageEdits(_model, images, imageFileNames, prompt, imageCount, options); + var rawContent = response.GetRawResponse().Content.ToString(); + var responseModel = JsonSerializer.Deserialize(rawContent, BotSharpOptions.defaultJsonOptions); var generatedImageCollection = response.Value; var generatedImages = GetImageGenerations(generatedImageCollection, options.ResponseFormat); @@ -30,6 +43,23 @@ public async Task GetImageComposition(Agent agent, RoleDialogMo GeneratedImages = generatedImages }; + // After generating hook + var unitCost = GetImageGenerationUnitCost(settings?.Cost?.ImageCosts, responseModel?.Quality, responseModel?.Size); + foreach (var hook in contentHooks) + { + await hook.AfterGenerated(responseMessage, new TokenStatsModel + { + Prompt = prompt, + Provider = Provider, + Model = _model, + TextInputTokens = generatedImageCollection?.Usage?.InputTokenDetails?.TextTokenCount ?? 0, + ImageInputTokens = generatedImageCollection?.Usage?.InputTokenDetails?.ImageTokenCount ?? 0, + ImageOutputTokens = generatedImageCollection?.Usage?.OutputTokenCount ?? 0, + ImageGenerationCount = imageCount, + ImageGenerationUnitCost = unitCost + }); + } + return await Task.FromResult(responseMessage); } } diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Edit.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Edit.cs index 7a8d44725..e9944394b 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Edit.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Edit.cs @@ -7,11 +7,24 @@ public partial class ImageCompletionProvider { public async Task GetImageEdits(Agent agent, RoleDialogModel message, Stream image, string imageFileName) { + var contentHooks = _services.GetHooks(agent.Id); + + // Before generating hook + foreach (var hook in contentHooks) + { + await hook.BeforeGenerating(agent, [message]); + } + + var settingsService = _services.GetRequiredService(); + var settings = settingsService.GetSetting(Provider, _model); + var client = ProviderHelper.GetClient(Provider, _model, _services); - var (prompt, imageCount, options) = PrepareEditOptions(message); + var (prompt, imageCount, options) = PrepareEditOptions(message, settings?.Image?.Edit); var imageClient = client.GetImageClient(_model); var response = imageClient.GenerateImageEdits(image, imageFileName, prompt, imageCount, options); + var rawContent = response.GetRawResponse().Content.ToString(); + var responseModel = JsonSerializer.Deserialize(rawContent, BotSharpOptions.defaultJsonOptions); var images = response.Value; var generatedImages = GetImageGenerations(images, options.ResponseFormat); @@ -23,17 +36,47 @@ public async Task GetImageEdits(Agent agent, RoleDialogModel me GeneratedImages = generatedImages }; + // After generating hook + var unitCost = GetImageGenerationUnitCost(settings?.Cost?.ImageCosts, responseModel?.Quality, responseModel?.Size); + foreach (var hook in contentHooks) + { + await hook.AfterGenerated(responseMessage, new TokenStatsModel + { + Prompt = prompt, + Provider = Provider, + Model = _model, + TextInputTokens = images?.Usage?.InputTokenDetails?.TextTokenCount ?? 0, + ImageInputTokens = images?.Usage?.InputTokenDetails?.ImageTokenCount ?? 0, + ImageOutputTokens = images?.Usage?.OutputTokenCount ?? 0, + ImageGenerationCount = imageCount, + ImageGenerationUnitCost = unitCost + }); + } + return await Task.FromResult(responseMessage); } public async Task GetImageEdits(Agent agent, RoleDialogModel message, Stream image, string imageFileName, Stream mask, string maskFileName) { + var contentHooks = _services.GetHooks(agent.Id); + + // Before generating hook + foreach (var hook in contentHooks) + { + await hook.BeforeGenerating(agent, [message]); + } + + var settingsService = _services.GetRequiredService(); + var settings = settingsService.GetSetting(Provider, _model); + var client = ProviderHelper.GetClient(Provider, _model, _services); - var (prompt, imageCount, options) = PrepareEditOptions(message); + var (prompt, imageCount, options) = PrepareEditOptions(message, settings?.Image?.Edit); var imageClient = client.GetImageClient(_model); var response = imageClient.GenerateImageEdits(image, imageFileName, prompt, mask, maskFileName, imageCount, options); + var rawContent = response.GetRawResponse().Content.ToString(); + var responseModel = JsonSerializer.Deserialize(rawContent, BotSharpOptions.defaultJsonOptions); var images = response.Value; var generatedImages = GetImageGenerations(images, options.ResponseFormat); @@ -45,27 +88,40 @@ public async Task GetImageEdits(Agent agent, RoleDialogModel me GeneratedImages = generatedImages }; + // After generating hook + var unitCost = GetImageGenerationUnitCost(settings?.Cost?.ImageCosts, responseModel?.Quality, responseModel?.Size); + foreach (var hook in contentHooks) + { + await hook.AfterGenerated(responseMessage, new TokenStatsModel + { + Prompt = prompt, + Provider = Provider, + Model = _model, + TextInputTokens = images?.Usage?.InputTokenDetails?.TextTokenCount ?? 0, + ImageInputTokens = images?.Usage?.InputTokenDetails?.ImageTokenCount ?? 0, + ImageOutputTokens = images?.Usage?.OutputTokenCount ?? 0, + ImageGenerationCount = imageCount, + ImageGenerationUnitCost = unitCost + }); + } + return await Task.FromResult(responseMessage); } - private (string, int, ImageEditOptions) PrepareEditOptions(RoleDialogModel message) + private (string, int, ImageEditOptions) PrepareEditOptions(RoleDialogModel message, ImageEditSetting? settings) { var prompt = message?.Payload ?? message?.Content ?? string.Empty; - var settingsService = _services.GetRequiredService(); var state = _services.GetRequiredService(); - var size = state.GetState("image_size"); var quality = state.GetState("image_quality"); var responseFormat = state.GetState("image_response_format"); var background = state.GetState("image_background"); - var settings = settingsService.GetSetting(Provider, _model)?.Image?.Edit; - - size = settings?.Size != null ? LlmUtility.VerifyModelParameter(size, settings.Size.Default, settings.Size.Options) : null; - quality = settings?.Quality != null ? LlmUtility.VerifyModelParameter(quality, settings.Quality.Default, settings.Quality.Options) : null; - responseFormat = settings?.ResponseFormat != null ? LlmUtility.VerifyModelParameter(responseFormat, settings.ResponseFormat.Default, settings.ResponseFormat.Options) : null; - background = settings?.Background != null ? LlmUtility.VerifyModelParameter(background, settings.Background.Default, settings.Background.Options) : null; + size = LlmUtility.GetModelParameter(settings?.Parameters, "Size", size); + quality = LlmUtility.GetModelParameter(settings?.Parameters, "Quality", quality); + background = LlmUtility.GetModelParameter(settings?.Parameters, "Background", background); + responseFormat = LlmUtility.GetModelParameter(settings?.Parameters, "ResponseFormat", responseFormat); var options = new ImageEditOptions(); if (!string.IsNullOrEmpty(size)) diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Generation.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Generation.cs index c1ad30d16..ad9d70b6b 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Generation.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Generation.cs @@ -1,5 +1,4 @@ #pragma warning disable OPENAI001 -using BotSharp.Abstraction.Hooks; using OpenAI.Images; namespace BotSharp.Plugin.OpenAI.Providers.Image; @@ -8,11 +7,24 @@ public partial class ImageCompletionProvider { public async Task GetImageGeneration(Agent agent, RoleDialogModel message) { + var contentHooks = _services.GetHooks(agent.Id); + + // Before generating hook + foreach (var hook in contentHooks) + { + await hook.BeforeGenerating(agent, [message]); + } + + var settingsService = _services.GetRequiredService(); + var settings = settingsService.GetSetting(Provider, _model); + var client = ProviderHelper.GetClient(Provider, _model, _services); - var (prompt, imageCount, options) = PrepareGenerationOptions(message); + var (prompt, imageCount, options) = PrepareGenerationOptions(message, settings?.Image?.Generation); var imageClient = client.GetImageClient(_model); var response = imageClient.GenerateImages(prompt, imageCount, options); + var rawContent = response.GetRawResponse().Content.ToString(); + var responseModel = JsonSerializer.Deserialize(rawContent, BotSharpOptions.defaultJsonOptions); var images = response.Value; var generatedImages = GetImageGenerations(images, options.ResponseFormat); @@ -24,29 +36,42 @@ public async Task GetImageGeneration(Agent agent, RoleDialogMod GeneratedImages = generatedImages }; + // After generating hook + var unitCost = GetImageGenerationUnitCost(settings?.Cost?.ImageCosts, responseModel?.Quality, responseModel?.Size); + foreach (var hook in contentHooks) + { + await hook.AfterGenerated(responseMessage, new TokenStatsModel + { + Prompt = prompt, + Provider = Provider, + Model = _model, + TextInputTokens = images?.Usage?.InputTokenDetails?.TextTokenCount ?? 0, + ImageInputTokens = images?.Usage?.InputTokenDetails?.ImageTokenCount ?? 0, + ImageOutputTokens = images?.Usage?.OutputTokenCount ?? 0, + ImageGenerationCount = imageCount, + ImageGenerationUnitCost = unitCost + }); + } + return await Task.FromResult(responseMessage); } - private (string, int, ImageGenerationOptions) PrepareGenerationOptions(RoleDialogModel message) + private (string, int, ImageGenerationOptions) PrepareGenerationOptions(RoleDialogModel message, ImageGenerationSetting? settings) { var prompt = message?.Payload ?? message?.Content ?? string.Empty; - var settingsService = _services.GetRequiredService(); var state = _services.GetRequiredService(); - var size = state.GetState("image_size"); var quality = state.GetState("image_quality"); var style = state.GetState("image_style"); var responseFormat = state.GetState("image_response_format"); var background = state.GetState("image_background"); - var settings = settingsService.GetSetting(Provider, _model)?.Image?.Generation; - - size = settings?.Size != null ? LlmUtility.VerifyModelParameter(size, settings.Size.Default, settings.Size.Options) : null; - quality = settings?.Quality != null ? LlmUtility.VerifyModelParameter(quality, settings.Quality.Default, settings.Quality.Options) : null; - style = settings?.Style != null ? LlmUtility.VerifyModelParameter(style, settings.Style.Default, settings.Style.Options) : null; - responseFormat = settings?.ResponseFormat != null ? LlmUtility.VerifyModelParameter(responseFormat, settings.ResponseFormat.Default, settings.ResponseFormat.Options) : null; - background = settings?.Background != null ? LlmUtility.VerifyModelParameter(background, settings.Background.Default, settings.Background.Options) : null; + size = LlmUtility.GetModelParameter(settings?.Parameters, "Size", size); + quality = LlmUtility.GetModelParameter(settings?.Parameters, "Quality", quality); + style = LlmUtility.GetModelParameter(settings?.Parameters, "Style", style); + background = LlmUtility.GetModelParameter(settings?.Parameters, "Background", background); + responseFormat = LlmUtility.GetModelParameter(settings?.Parameters, "ResponseFormat", responseFormat); var options = new ImageGenerationOptions(); if (!string.IsNullOrEmpty(size)) diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Variation.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Variation.cs index 22d8f2a53..02a46e677 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Variation.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.Variation.cs @@ -1,3 +1,4 @@ +#pragma warning disable OPENAI001 using OpenAI.Images; namespace BotSharp.Plugin.OpenAI.Providers.Image; @@ -6,11 +7,24 @@ public partial class ImageCompletionProvider { public async Task GetImageVariation(Agent agent, RoleDialogModel message, Stream image, string imageFileName) { + var contentHooks = _services.GetHooks(agent.Id); + + // Before generating hook + foreach (var hook in contentHooks) + { + await hook.BeforeGenerating(agent, [message]); + } + + var settingsService = _services.GetRequiredService(); + var settings = settingsService.GetSetting(Provider, _model); + var client = ProviderHelper.GetClient(Provider, _model, _services); - var (imageCount, options) = PrepareVariationOptions(); + var (imageCount, options) = PrepareVariationOptions(settings?.Image?.Variation); var imageClient = client.GetImageClient(_model); var response = imageClient.GenerateImageVariations(image, imageFileName, imageCount, options); + var rawContent = response.GetRawResponse().Content.ToString(); + var responseModel = JsonSerializer.Deserialize(rawContent, BotSharpOptions.defaultJsonOptions); var images = response.Value; var generatedImages = GetImageGenerations(images, options.ResponseFormat); @@ -22,21 +36,35 @@ public async Task GetImageVariation(Agent agent, RoleDialogMode GeneratedImages = generatedImages }; + // After generating hook + var unitCost = GetImageGenerationUnitCost(settings?.Cost?.ImageCosts, responseModel?.Quality, responseModel?.Size); + foreach (var hook in contentHooks) + { + await hook.AfterGenerated(responseMessage, new TokenStatsModel + { + Prompt = string.Empty, + Provider = Provider, + Model = _model, + TextInputTokens = images?.Usage?.InputTokenDetails?.TextTokenCount ?? 0, + ImageInputTokens = images?.Usage?.InputTokenDetails?.ImageTokenCount ?? 0, + ImageOutputTokens = images?.Usage?.OutputTokenCount ?? 0, + ImageGenerationCount = imageCount, + ImageGenerationUnitCost = unitCost + }); + } + return await Task.FromResult(responseMessage); } - private (int, ImageVariationOptions) PrepareVariationOptions() + private (int, ImageVariationOptions) PrepareVariationOptions(ImageVariationSetting? settings) { - var settingsService = _services.GetRequiredService(); var state = _services.GetRequiredService(); var size = state.GetState("image_size"); var responseFormat = state.GetState("image_response_format"); - var settings = settingsService.GetSetting(Provider, _model)?.Image?.Variation; - - size = settings?.Size != null ? LlmUtility.VerifyModelParameter(size, settings.Size.Default, settings.Size.Options) : null; - responseFormat = settings?.ResponseFormat != null ? LlmUtility.VerifyModelParameter(responseFormat, settings.ResponseFormat.Default, settings.ResponseFormat.Options) : null; + size = LlmUtility.GetModelParameter(settings?.Parameters, "Size", size); + responseFormat = LlmUtility.GetModelParameter(settings?.Parameters, "ResponseFormat", responseFormat); var options = new ImageVariationOptions(); if (!string.IsNullOrEmpty(size)) diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.cs index 919054b1d..6082f8e2d 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Image/ImageCompletionProvider.cs @@ -201,5 +201,25 @@ private int GetImageCount(string count) } return retCount; } + + private float GetImageGenerationUnitCost(IEnumerable? costs, string? quality, string? size) + { + var unitCost = 0f; + if (costs.IsNullOrEmpty()) + { + return unitCost; + } + + if (string.IsNullOrEmpty(quality) || string.IsNullOrEmpty(size)) + { + return unitCost; + } + + var found = costs!.FirstOrDefault(x => x.Attributes != null + && x.Attributes.GetValueOrDefault("Quality", string.Empty).IsEqualTo(quality) + && x.Attributes.GetValueOrDefault("Size", string.Empty).IsEqualTo(size)); + unitCost = found?.Cost ?? unitCost; + return unitCost; + } #endregion } diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Using.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Using.cs index 7a111d90a..aa1f3b57c 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Using.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Using.cs @@ -17,6 +17,7 @@ global using BotSharp.Abstraction.Conversations; global using BotSharp.Abstraction.Conversations.Models; global using BotSharp.Abstraction.Loggers; +global using BotSharp.Abstraction.Hooks; global using BotSharp.Abstraction.MLTasks; global using BotSharp.Abstraction.Agents; global using BotSharp.Abstraction.Files; @@ -37,4 +38,5 @@ global using BotSharp.Plugin.OpenAI.Models.Text; global using BotSharp.Plugin.OpenAI.Models.Realtime; +global using BotSharp.Plugin.OpenAI.Models.Image; global using BotSharp.Plugin.OpenAI.Settings; \ No newline at end of file diff --git a/src/WebStarter/appsettings.json b/src/WebStarter/appsettings.json index a97667e9e..03c42e746 100644 --- a/src/WebStarter/appsettings.json +++ b/src/WebStarter/appsettings.json @@ -236,37 +236,43 @@ ], "Image": { "Generation": { - "Size": { - "Default": "1024x1024", - "Options": [ "256x256", "512x512", "1024x1024" ] - }, - "Quality": { - "Default": "standard", - "Options": [ "standard" ] - }, - "ResponseFormat": { - "Default": "bytes", - "Options": [ "url", "bytes" ] + "Parameters": { + "Size": { + "Default": "1024x1024", + "Options": [ "256x256", "512x512", "1024x1024" ] + }, + "Quality": { + "Default": "standard", + "Options": [ "standard" ] + }, + "ResponseFormat": { + "Default": "bytes", + "Options": [ "url", "bytes" ] + } } }, "Edit": { - "Size": { - "Default": "1024x1024", - "Options": [ "256x256", "512x512", "1024x1024" ] - }, - "ResponseFormat": { - "Default": "bytes", - "Options": [ "url", "bytes" ] + "Parameters": { + "Size": { + "Default": "1024x1024", + "Options": [ "256x256", "512x512", "1024x1024" ] + }, + "ResponseFormat": { + "Default": "bytes", + "Options": [ "url", "bytes" ] + } } }, "Variation": { - "Size": { - "Default": "1024x1024", - "Options": [ "256x256", "512x512", "1024x1024" ] - }, - "ResponseFormat": { - "Default": "bytes", - "Options": [ "url", "bytes" ] + "Parameters": { + "Size": { + "Default": "1024x1024", + "Options": [ "256x256", "512x512", "1024x1024" ] + }, + "ResponseFormat": { + "Default": "bytes", + "Options": [ "url", "bytes" ] + } } } }, @@ -290,21 +296,23 @@ ], "Image": { "Generation": { - "Size": { - "Default": "1024x1024", - "Options": [ "1024x1024", "1792x1024", "1024x1792" ] - }, - "Quality": { - "Default": "standard", - "Options": [ "standard", "hd", "auto" ] - }, - "Style": { - "Default": "natural", - "Options": [ "natural", "vivid" ] - }, - "ResponseFormat": { - "Default": "bytes", - "Options": [ "url", "bytes" ] + "Parameters": { + "Size": { + "Default": "1024x1024", + "Options": [ "1024x1024", "1792x1024", "1024x1792" ] + }, + "Quality": { + "Default": "standard", + "Options": [ "standard", "hd", "auto" ] + }, + "Style": { + "Default": "natural", + "Options": [ "natural", "vivid" ] + }, + "ResponseFormat": { + "Default": "bytes", + "Options": [ "url", "bytes" ] + } } } }, @@ -328,17 +336,19 @@ ], "Image": { "Edit": { - "Size": { - "Default": "1024x1024", - "Options": [ "1024x1024", "1536x1024", "1024x1536", "auto" ] - }, - "Quality": { - "Default": "medium", - "Options": [ "low", "medium", "high", "auto" ] - }, - "Background": { - "Default": "auto", - "Options": [ "auto", "transparent", "opaque" ] + "Parameters": { + "Size": { + "Default": "1024x1024", + "Options": [ "1024x1024", "1536x1024", "1024x1536", "auto" ] + }, + "Quality": { + "Default": "medium", + "Options": [ "low", "medium", "high", "auto" ] + }, + "Background": { + "Default": "auto", + "Options": [ "auto", "transparent", "opaque" ] + } } } }, @@ -348,7 +358,26 @@ "AudioInputCost": 0, "CachedAudioInputCost": 0, "TextOutputCost": 0.03, - "AudioOutputCost": 0 + "AudioOutputCost": 0, + "ImageInputCost": 0.01, + "CachedImageInputCost": 0.0025, + "ImageOutputCost": 0.04, + "ImageCosts": [ + { + "Attributes": { + "Quality": "medium", + "Size": "1024x1024" + }, + "Cost": 0.042 + }, + { + "Attributes": { + "Quality": "high", + "Size": "1024x1024" + }, + "Cost": 0.167 + } + ] } }, { @@ -362,17 +391,19 @@ ], "Image": { "Edit": { - "Size": { - "Default": "1024x1024", - "Options": [ "1024x1024", "1536x1024", "1024x1536", "auto" ] - }, - "Quality": { - "Default": "medium", - "Options": [ "low", "medium", "high", "auto" ] - }, - "Background": { - "Default": "auto", - "Options": [ "auto", "transparent", "opaque" ] + "Parameters": { + "Size": { + "Default": "1024x1024", + "Options": [ "1024x1024", "1536x1024", "1024x1536", "auto" ] + }, + "Quality": { + "Default": "medium", + "Options": [ "low", "medium", "high", "auto" ] + }, + "Background": { + "Default": "auto", + "Options": [ "auto", "transparent", "opaque" ] + } } } }, @@ -382,7 +413,26 @@ "AudioInputCost": 0, "CachedAudioInputCost": 0, "TextOutputCost": 0.03, - "AudioOutputCost": 0 + "AudioOutputCost": 0, + "ImageInputCost": 0.0025, + "CachedImageInputCost": 0.00025, + "ImageOutputCost": 0.008, + "ImageCosts": [ + { + "Attributes": { + "Quality": "medium", + "Size": "1024x1024" + }, + "Cost": 0.011 + }, + { + "Attributes": { + "Quality": "high", + "Size": "1024x1024" + }, + "Cost": 0.036 + } + ] } }, { @@ -842,6 +892,20 @@ "PythonVersion": "3.13.3" }, + "FuzzySharp": { + "Data": { + "BaseDir": "data/tokens", + "Vocabulary": { + "Folder": "vocabulary", + "FileNames": [] + }, + "Synonym": { + "Folder": "synonym", + "FileNames": [] + } + } + }, + "RealtimeModel": { "Provider": "openai", "Model": "gpt-realtime",