diff --git a/MultiImageClient/Enums/GoogleImageAspectRatio.cs b/MultiImageClient/Enums/GoogleImageAspectRatio.cs new file mode 100644 index 0000000..329f77c --- /dev/null +++ b/MultiImageClient/Enums/GoogleImageAspectRatio.cs @@ -0,0 +1,37 @@ +namespace MultiImageClient +{ + public enum GoogleImageAspectRatio + { + Ratio1x1, + Ratio2x3, + Ratio3x2, + Ratio3x4, + Ratio4x3, + Ratio4x5, + Ratio5x4, + Ratio9x16, + Ratio16x9, + Ratio21x9 + } + + public static class GoogleImageAspectRatioExtensions + { + public static string ToApiString(this GoogleImageAspectRatio ratio) + { + return ratio switch + { + GoogleImageAspectRatio.Ratio1x1 => "1:1", + GoogleImageAspectRatio.Ratio2x3 => "2:3", + GoogleImageAspectRatio.Ratio3x2 => "3:2", + GoogleImageAspectRatio.Ratio3x4 => "3:4", + GoogleImageAspectRatio.Ratio4x3 => "4:3", + GoogleImageAspectRatio.Ratio4x5 => "4:5", + GoogleImageAspectRatio.Ratio5x4 => "5:4", + GoogleImageAspectRatio.Ratio9x16 => "9:16", + GoogleImageAspectRatio.Ratio16x9 => "16:9", + GoogleImageAspectRatio.Ratio21x9 => "21:9", + _ => "1:1" + }; + } + } +} diff --git a/MultiImageClient/Enums/GoogleImageSize.cs b/MultiImageClient/Enums/GoogleImageSize.cs new file mode 100644 index 0000000..6342c1b --- /dev/null +++ b/MultiImageClient/Enums/GoogleImageSize.cs @@ -0,0 +1,23 @@ +namespace MultiImageClient +{ + public enum GoogleImageSize + { + Size1K, + Size2K, + Size4K // Note: 4K only supported by Gemini, not Imagen 4 + } + + public static class GoogleImageSizeExtensions + { + public static string ToApiString(this GoogleImageSize size) + { + return size switch + { + GoogleImageSize.Size1K => "1K", + GoogleImageSize.Size2K => "2K", + GoogleImageSize.Size4K => "4K", + _ => "1K" + }; + } + } +} diff --git a/MultiImageClient/Enums/GoogleOutputMimeType.cs b/MultiImageClient/Enums/GoogleOutputMimeType.cs new file mode 100644 index 0000000..10c9d1d --- /dev/null +++ b/MultiImageClient/Enums/GoogleOutputMimeType.cs @@ -0,0 +1,31 @@ +namespace MultiImageClient +{ + public enum GoogleOutputMimeType + { + Png, // Default - lossless + Jpeg // Lossy, smaller file size, supports compression quality + } + + public static class GoogleOutputMimeTypeExtensions + { + public static string ToApiString(this GoogleOutputMimeType mimeType) + { + return mimeType switch + { + GoogleOutputMimeType.Png => "image/png", + GoogleOutputMimeType.Jpeg => "image/jpeg", + _ => "image/png" + }; + } + + public static string ToFileExtension(this GoogleOutputMimeType mimeType) + { + return mimeType switch + { + GoogleOutputMimeType.Png => ".png", + GoogleOutputMimeType.Jpeg => ".jpg", + _ => ".png" + }; + } + } +} diff --git a/MultiImageClient/Enums/GooglePersonGeneration.cs b/MultiImageClient/Enums/GooglePersonGeneration.cs new file mode 100644 index 0000000..c555461 --- /dev/null +++ b/MultiImageClient/Enums/GooglePersonGeneration.cs @@ -0,0 +1,23 @@ +namespace MultiImageClient +{ + public enum GooglePersonGeneration + { + AllowAdult, // Default - allow generation of adults only (no celebrities) + DontAllow, // Disable people/faces in generated images + AllowAll // Allow all person generation (most permissive) + } + + public static class GooglePersonGenerationExtensions + { + public static string ToApiString(this GooglePersonGeneration setting) + { + return setting switch + { + GooglePersonGeneration.AllowAdult => "allow_adult", + GooglePersonGeneration.DontAllow => "dont_allow", + GooglePersonGeneration.AllowAll => "ALLOW_ALL", + _ => "allow_adult" + }; + } + } +} diff --git a/MultiImageClient/Enums/GoogleSafetyFilterLevel.cs b/MultiImageClient/Enums/GoogleSafetyFilterLevel.cs new file mode 100644 index 0000000..076fa38 --- /dev/null +++ b/MultiImageClient/Enums/GoogleSafetyFilterLevel.cs @@ -0,0 +1,25 @@ +namespace MultiImageClient +{ + public enum GoogleSafetyFilterLevel + { + BlockLowAndAbove, // Highest safety - most filtering + BlockMediumAndAbove, // Default - balanced filtering + BlockOnlyHigh, // Lowest safety - least filtering (may increase objectionable content) + BlockNone // Disable safety filtering entirely (if supported) + } + + public static class GoogleSafetyFilterLevelExtensions + { + public static string ToApiString(this GoogleSafetyFilterLevel level) + { + return level switch + { + GoogleSafetyFilterLevel.BlockLowAndAbove => "block_low_and_above", + GoogleSafetyFilterLevel.BlockMediumAndAbove => "block_medium_and_above", + GoogleSafetyFilterLevel.BlockOnlyHigh => "block_only_high", + GoogleSafetyFilterLevel.BlockNone => "BLOCK_NONE", + _ => "block_medium_and_above" + }; + } + } +} diff --git a/MultiImageClient/ImageGenerators/GoogleGenerator.cs b/MultiImageClient/ImageGenerators/GoogleGenerator.cs index 09004ee..5f248f0 100644 --- a/MultiImageClient/ImageGenerators/GoogleGenerator.cs +++ b/MultiImageClient/ImageGenerators/GoogleGenerator.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Collections.Generic; using System.Linq; using System.Net.Http; @@ -17,11 +17,19 @@ public class GoogleGenerator : IImageGenerator private MultiClientRunStats _stats; private string _name; private ImageGeneratorApiType _apiType; + private GoogleImageSize _imageSize; + private GoogleImageAspectRatio _aspectRatio; public ImageGeneratorApiType ApiType => ImageGeneratorApiType.GoogleNanoBanana; - public GoogleGenerator(ImageGeneratorApiType apiType, string apiKey, int maxConcurrency, - MultiClientRunStats stats, string name = "") + public GoogleGenerator( + ImageGeneratorApiType apiType, + string apiKey, + int maxConcurrency, + MultiClientRunStats stats, + string name = "", + GoogleImageSize imageSize = GoogleImageSize.Size1K, + GoogleImageAspectRatio aspectRatio = GoogleImageAspectRatio.Ratio1x1) { _apiKey = apiKey; _googleSemaphore = new SemaphoreSlim(maxConcurrency); @@ -29,21 +37,32 @@ public GoogleGenerator(ImageGeneratorApiType apiType, string apiKey, int maxConc _name = string.IsNullOrEmpty(name) ? "" : name; _stats = stats; _apiType = apiType; - + _imageSize = imageSize; + _aspectRatio = aspectRatio; } public string GetFilenamePart(PromptDetails pd) { - return $"{_apiType}"; + var namePart = string.IsNullOrEmpty(_name) ? "" : $"-{_name}"; + return $"{_apiType}{namePart}_{_imageSize.ToApiString()}_{_aspectRatio.ToApiString().Replace(":", "x")}"; } public decimal GetCost() { // Gemini 2.5 Flash Image uses token-based pricing // $30 per 1 million tokens for image output (1290 tokens per image up to 1024x1024px) + // Higher resolutions consume proportionally more tokens if (_apiType == ImageGeneratorApiType.GoogleNanoBanana) { - return (30m / 1000000m) * 1290m; + var baseTokens = 1290m; + var multiplier = _imageSize switch + { + GoogleImageSize.Size1K => 1.0m, + GoogleImageSize.Size2K => 4.0m, // 2x2 = 4x pixels + GoogleImageSize.Size4K => 16.0m, // 4x4 = 16x pixels + _ => 1.0m + }; + return (30m / 1000000m) * baseTokens * multiplier; } else if (_apiType == ImageGeneratorApiType.GoogleImagen4) { @@ -57,18 +76,19 @@ public decimal GetCost() public List GetRightParts() { - return new List { _apiType.ToString() }; + var namePart = string.IsNullOrEmpty(_name) ? "" : _name; + return new List { _apiType.ToString(), namePart, _imageSize.ToApiString(), _aspectRatio.ToApiString() }; } public string GetGeneratorSpecPart() { if (string.IsNullOrEmpty(_name)) { - return $"google-{_apiType.ToString()}"; + return $"google-{_apiType.ToString()}\n{_imageSize.ToApiString()} {_aspectRatio.ToApiString()}"; } else { - return _name; + return $"{_name}\n{_imageSize.ToApiString()} {_aspectRatio.ToApiString()}"; } } @@ -96,7 +116,12 @@ public async Task ProcessPromptAsync(IImageGenerator generato }, generationConfig = new { - responseModalities = new[] { "TEXT", "IMAGE" } + responseModalities = new[] { "TEXT", "IMAGE" }, + imageConfig = new + { + imageSize = _imageSize.ToApiString(), + aspectRatio = _aspectRatio.ToApiString() + } } }; @@ -214,4 +239,4 @@ public void Dispose() } } -} \ No newline at end of file +} diff --git a/MultiImageClient/ImageGenerators/GoogleImageGenerationOptions.cs b/MultiImageClient/ImageGenerators/GoogleImageGenerationOptions.cs new file mode 100644 index 0000000..34d23a5 --- /dev/null +++ b/MultiImageClient/ImageGenerators/GoogleImageGenerationOptions.cs @@ -0,0 +1,136 @@ +namespace MultiImageClient +{ + /// + /// Configuration options for Google image generation APIs (both Gemini and Imagen 4). + /// Not all options are supported by all APIs - see individual property docs. + /// + public class GoogleImageGenerationOptions + { + /// + /// Output image resolution. Gemini supports 1K/2K/4K, Imagen 4 only supports 1K/2K. + /// Default: Size1K + /// + public GoogleImageSize ImageSize { get; set; } = GoogleImageSize.Size1K; + + /// + /// Aspect ratio of generated images. + /// Supported: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9 + /// Default: Ratio1x1 + /// + public GoogleImageAspectRatio AspectRatio { get; set; } = GoogleImageAspectRatio.Ratio1x1; + + /// + /// Controls generation of people/faces in images. + /// Imagen 4 only. Default: AllowAdult + /// + public GooglePersonGeneration PersonGeneration { get; set; } = GooglePersonGeneration.AllowAdult; + + /// + /// Safety filter threshold level. + /// Imagen 4 only. Default: BlockMediumAndAbove + /// + public GoogleSafetyFilterLevel SafetyFilterLevel { get; set; } = GoogleSafetyFilterLevel.BlockMediumAndAbove; + + /// + /// Output image format. + /// Imagen 4 only. Default: Png + /// + public GoogleOutputMimeType OutputMimeType { get; set; } = GoogleOutputMimeType.Png; + + /// + /// JPEG compression quality (0-100). Only applies when OutputMimeType is Jpeg. + /// Imagen 4 only. Default: 75 + /// + public int CompressionQuality { get; set; } = 75; + + /// + /// Whether to use LLM-based prompt enhancement for higher quality images. + /// Imagen 4 only. Default: false (to preserve exact prompts) + /// + public bool EnhancePrompt { get; set; } = false; + + /// + /// Whether to add a SynthID digital watermark to generated images. + /// When true, seed parameter is ignored. + /// Imagen 4 only. Default: false + /// + public bool AddWatermark { get; set; } = false; + + /// + /// Random seed for deterministic output. Only works when AddWatermark=false and EnhancePrompt=false. + /// Set to null for random generation. + /// Imagen 4 only. Default: null + /// + public uint? Seed { get; set; } = null; + + /// + /// Number of images to generate per request (1-4). + /// Imagen 4 only. Default: 1 + /// + public int NumberOfImages { get; set; } = 1; + + /// + /// Whether to include RAI (Responsible AI) filter reason in responses. + /// Imagen 4 only. Default: true + /// + public bool IncludeRaiReason { get; set; } = true; + + /// + /// Creates a copy of this options object with potentially modified values. + /// + public GoogleImageGenerationOptions Clone() + { + return new GoogleImageGenerationOptions + { + ImageSize = this.ImageSize, + AspectRatio = this.AspectRatio, + PersonGeneration = this.PersonGeneration, + SafetyFilterLevel = this.SafetyFilterLevel, + OutputMimeType = this.OutputMimeType, + CompressionQuality = this.CompressionQuality, + EnhancePrompt = this.EnhancePrompt, + AddWatermark = this.AddWatermark, + Seed = this.Seed, + NumberOfImages = this.NumberOfImages, + IncludeRaiReason = this.IncludeRaiReason + }; + } + + /// + /// Validates options for Imagen 4 API compatibility. + /// + public void ValidateForImagen4() + { + if (ImageSize == GoogleImageSize.Size4K) + { + throw new System.ArgumentException("Imagen 4 does not support 4K resolution. Use 1K or 2K."); + } + + if (NumberOfImages < 1 || NumberOfImages > 4) + { + throw new System.ArgumentException("NumberOfImages must be between 1 and 4."); + } + + if (CompressionQuality < 0 || CompressionQuality > 100) + { + throw new System.ArgumentException("CompressionQuality must be between 0 and 100."); + } + } + + /// + /// Gets a string representation of the key options for display/logging. + /// + public string ToDisplayString() + { + return $"{ImageSize.ToApiString()} {AspectRatio.ToApiString()}"; + } + + /// + /// Gets a filename-safe string representation of key options. + /// + public string ToFilenamePart() + { + return $"{ImageSize.ToApiString()}_{AspectRatio.ToApiString().Replace(":", "x")}"; + } + } +} diff --git a/MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs b/MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs index 7d62c3c..4f72972 100644 --- a/MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs +++ b/MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs @@ -20,24 +20,27 @@ public class GoogleImagen4Generator : IImageGenerator private string _apiKey; private MultiClientRunStats _stats; private string _name; - private string _aspectRatio; - private string _safetyFilterLevel; private string _location; private string _projectId; private string _googleServiceAccountKeyPath; private GoogleCredential _credential; + private GoogleImageGenerationOptions _options; public ImageGeneratorApiType ApiType => ImageGeneratorApiType.GoogleImagen4; - public GoogleImagen4Generator(string apiKey, int maxConcurrency, - MultiClientRunStats stats, string name, - string aspectRatio, - string safetyFilterLevel, + public GoogleImagen4Generator( + string apiKey, + int maxConcurrency, + MultiClientRunStats stats, + string name, string location, string projectId, - string googleServiceAccountKeyPath) - + string googleServiceAccountKeyPath, + GoogleImageGenerationOptions options = null) { + _options = options ?? new GoogleImageGenerationOptions(); + _options.ValidateForImagen4(); + _apiKey = apiKey; _googleSemaphore = new SemaphoreSlim(maxConcurrency); _location = location; @@ -61,37 +64,40 @@ public GoogleImagen4Generator(string apiKey, int maxConcurrency, _name = string.IsNullOrEmpty(name) ? "" : name; _stats = stats; - _aspectRatio = aspectRatio; - _safetyFilterLevel = safetyFilterLevel; } public string GetFilenamePart(PromptDetails pd) { var namePart = string.IsNullOrEmpty(_name) ? "" : $"-{_name}"; - return $"google-imagen4{namePart}"; + return $"google-imagen4{namePart}_{_options.ToFilenamePart()}"; } public decimal GetCost() { - // Imagen 4 pricing (higher than Imagen 3) - return 0.04m; + // Imagen 4 pricing - higher resolutions cost more + return _options.ImageSize switch + { + GoogleImageSize.Size1K => 0.04m, + GoogleImageSize.Size2K => 0.08m, + _ => 0.04m + }; } public List GetRightParts() { var namePart = string.IsNullOrEmpty(_name) ? "" : _name; - return new List { "imagen4", namePart }; + return new List { "imagen4", namePart, _options.ImageSize.ToApiString(), _options.AspectRatio.ToApiString() }; } public string GetGeneratorSpecPart() { if (string.IsNullOrEmpty(_name)) { - return "google-imagen4"; + return $"google-imagen4\n{_options.ToDisplayString()}"; } else { - return _name; + return $"{_name}\n{_options.ToDisplayString()}"; } } @@ -102,72 +108,95 @@ public async Task ProcessPromptAsync(IImageGenerator generato { _stats.GoogleRequestCount++; - // Google Gemini API endpoint for Imagen 4 + // Google Vertex AI endpoint for Imagen 4 var apiUrl = $"https://{_location}-aiplatform.googleapis.com/v1/projects/{_projectId}/locations/{_location}/publishers/google/models/imagen-4.0-generate-001:predict"; - // Construct the instance for the predict request - var instance = new Google.Protobuf.WellKnownTypes.Value + // Build the fields dictionary with all options + var fields = new Dictionary + { + { "prompt", Google.Protobuf.WellKnownTypes.Value.ForString(promptDetails.Prompt) }, + { "numberOfImages", Google.Protobuf.WellKnownTypes.Value.ForNumber(_options.NumberOfImages) }, + { "aspectRatio", Google.Protobuf.WellKnownTypes.Value.ForString(_options.AspectRatio.ToApiString()) }, + { "sampleImageSize", Google.Protobuf.WellKnownTypes.Value.ForString(_options.ImageSize.ToApiString()) }, + { "enhancePrompt", Google.Protobuf.WellKnownTypes.Value.ForBool(_options.EnhancePrompt) }, + { "includeRaiReason", Google.Protobuf.WellKnownTypes.Value.ForBool(_options.IncludeRaiReason) }, + { "safetySetting", Google.Protobuf.WellKnownTypes.Value.ForString(_options.SafetyFilterLevel.ToApiString()) }, + { "personGeneration", Google.Protobuf.WellKnownTypes.Value.ForString(_options.PersonGeneration.ToApiString()) }, + { "addWatermark", Google.Protobuf.WellKnownTypes.Value.ForBool(_options.AddWatermark) } + }; + + // Add seed if specified and watermark is disabled + if (_options.Seed.HasValue && !_options.AddWatermark && !_options.EnhancePrompt) + { + fields.Add("seed", Google.Protobuf.WellKnownTypes.Value.ForNumber(_options.Seed.Value)); + } + + // Add output options if not using defaults + if (_options.OutputMimeType != GoogleOutputMimeType.Png) { - StructValue = new Google.Protobuf.WellKnownTypes.Struct + var outputOptions = new Google.Protobuf.WellKnownTypes.Struct(); + outputOptions.Fields.Add("mimeType", Google.Protobuf.WellKnownTypes.Value.ForString(_options.OutputMimeType.ToApiString())); + + if (_options.OutputMimeType == GoogleOutputMimeType.Jpeg) { - Fields = - { - { "prompt", Google.Protobuf.WellKnownTypes.Value.ForString(promptDetails.Prompt) }, - { "numberOfImages", Google.Protobuf.WellKnownTypes.Value.ForNumber(1) }, - { "aspectRatio", Google.Protobuf.WellKnownTypes.Value.ForString(_aspectRatio) }, - { "enhancePrompt", Google.Protobuf.WellKnownTypes.Value.ForBool(false) }, - { "includeRaiReason", Google.Protobuf.WellKnownTypes.Value.ForBool(true) }, - { "safetyFilterLevel", Google.Protobuf.WellKnownTypes.Value.ForString(_safetyFilterLevel) }, - { "safetySetting", Google.Protobuf.WellKnownTypes.Value.ForString("block_only_high") }, - { "personGeneration", Google.Protobuf.WellKnownTypes.Value.ForString("ALLOW_ALL") }, - { "addWatermark", Google.Protobuf.WellKnownTypes.Value.ForBool(false) } - } + outputOptions.Fields.Add("compressionQuality", Google.Protobuf.WellKnownTypes.Value.ForNumber(_options.CompressionQuality)); } + + fields.Add("outputOptions", Google.Protobuf.WellKnownTypes.Value.ForStruct(outputOptions)); + } + + // Construct the instance for the predict request + var instanceStruct = new Google.Protobuf.WellKnownTypes.Struct(); + foreach (var field in fields) + { + instanceStruct.Fields.Add(field.Key, field.Value); + } + + var instance = new Google.Protobuf.WellKnownTypes.Value + { + StructValue = instanceStruct }; var instances = new List { instance }; - - // Imagen 4 does not use a separate 'config' field in parameters. - // Parameters are directly specified in the instance. - var parameters = new Google.Protobuf.WellKnownTypes.Value(); // No parameters needed for now. + var parameters = new Google.Protobuf.WellKnownTypes.Value(); var endpoint = EndpointName.FromProjectLocationPublisherModel(_projectId, _location, "google", "imagen-4.0-generate-001"); var response = await _predictionServiceClient.PredictAsync(endpoint, instances, parameters); var base64Images = new List(); - string commonMimeType = "image/png"; // Default or first detected mime type + string commonMimeType = _options.OutputMimeType.ToApiString(); if (response?.Predictions != null && response.Predictions.Any()) { foreach (var prediction in response.Predictions) + { + if (prediction?.StructValue?.Fields != null) { - if (prediction?.StructValue?.Fields != null) + var predictionFields = prediction.StructValue.Fields; + if (predictionFields.ContainsKey("bytesBase64Encoded") && predictionFields.ContainsKey("mimeType")) { - var predictionFields = prediction.StructValue.Fields; - if (predictionFields.ContainsKey("bytesBase64Encoded") && predictionFields.ContainsKey("mimeType")) - { - var imageData = predictionFields["bytesBase64Encoded"].StringValue; - var newPrompt = predictionFields["prompt"].StringValue; - var currentMimeType = predictionFields["mimeType"].StringValue; + var imageData = predictionFields["bytesBase64Encoded"].StringValue; + var newPrompt = predictionFields.ContainsKey("prompt") ? predictionFields["prompt"].StringValue : promptDetails.Prompt; + var currentMimeType = predictionFields["mimeType"].StringValue; - if (!string.IsNullOrEmpty(imageData)) + if (!string.IsNullOrEmpty(imageData)) + { + var bd = new CreatedBase64Image { - var bd = new CreatedBase64Image - { - bytesBase64 = imageData, - newPrompt = newPrompt, - }; + bytesBase64 = imageData, + newPrompt = newPrompt, + }; - base64Images.Add(bd); - if (!string.IsNullOrEmpty(currentMimeType) && (commonMimeType == "image/png")) - { - commonMimeType = currentMimeType; // Use the first valid mime type found if default - } + base64Images.Add(bd); + if (!string.IsNullOrEmpty(currentMimeType)) + { + commonMimeType = currentMimeType; } } } } + } } if (base64Images.Count == 0) diff --git a/MultiImageClient/Workflows/GeneratorGroups.cs b/MultiImageClient/Workflows/GeneratorGroups.cs index 92dd2e9..eec569a 100644 --- a/MultiImageClient/Workflows/GeneratorGroups.cs +++ b/MultiImageClient/Workflows/GeneratorGroups.cs @@ -1,4 +1,4 @@ -using IdeogramAPIClient; +using IdeogramAPIClient; using OpenAI.Images; @@ -63,8 +63,53 @@ public IEnumerable GetAll() //var myGenerators = new List() { dalle3, ideogram2, bfl1, bfl2, bfl3, recraft6, ideogram4, }; //var myGenerators = new List() { dalle3, recraft1, recraft2, recraft3, recraft4, recraft5, recraft6, ideogram1, ideogram2, bfl1, bfl2 }; - var google_banana = new GoogleGenerator(ImageGeneratorApiType.GoogleNanoBanana, _settings.GoogleGeminiApiKey, _concurrency, _stats); - var googleimagen = new GoogleImagen4Generator(_settings.GoogleGeminiApiKey, _concurrency, _stats, "", "2:5", "BLOCK_NONE", location: _settings.GoogleCloudLocation, projectId: _settings.GoogleCloudProjectId, googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath); + // Google Gemini/Nano Banana generators with various resolutions + var google_banana = new GoogleGenerator(ImageGeneratorApiType.GoogleNanoBanana, _settings.GoogleGeminiApiKey, _concurrency, _stats, + imageSize: GoogleImageSize.Size1K, aspectRatio: GoogleImageAspectRatio.Ratio1x1); + var google_banana_2k = new GoogleGenerator(ImageGeneratorApiType.GoogleNanoBanana, _settings.GoogleGeminiApiKey, _concurrency, _stats, + name: "banana-2k", imageSize: GoogleImageSize.Size2K, aspectRatio: GoogleImageAspectRatio.Ratio16x9); + var google_banana_4k = new GoogleGenerator(ImageGeneratorApiType.GoogleNanoBanana, _settings.GoogleGeminiApiKey, _concurrency, _stats, + name: "banana-4k", imageSize: GoogleImageSize.Size4K, aspectRatio: GoogleImageAspectRatio.Ratio1x1); + + // Google Imagen 4 generators (max 2K, no 4K support) - using options class + var imagen4Options1k = new GoogleImageGenerationOptions + { + ImageSize = GoogleImageSize.Size1K, + AspectRatio = GoogleImageAspectRatio.Ratio16x9, + SafetyFilterLevel = GoogleSafetyFilterLevel.BlockNone, + PersonGeneration = GooglePersonGeneration.AllowAll + }; + var googleimagen = new GoogleImagen4Generator(_settings.GoogleGeminiApiKey, _concurrency, _stats, "", + location: _settings.GoogleCloudLocation, projectId: _settings.GoogleCloudProjectId, + googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, + options: imagen4Options1k); + + var imagen4Options2k = new GoogleImageGenerationOptions + { + ImageSize = GoogleImageSize.Size2K, + AspectRatio = GoogleImageAspectRatio.Ratio16x9, + SafetyFilterLevel = GoogleSafetyFilterLevel.BlockNone, + PersonGeneration = GooglePersonGeneration.AllowAll + }; + var googleimagen_2k = new GoogleImagen4Generator(_settings.GoogleGeminiApiKey, _concurrency, _stats, "imagen4-2k", + location: _settings.GoogleCloudLocation, projectId: _settings.GoogleCloudProjectId, + googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, + options: imagen4Options2k); + + // Example with JPEG output and deterministic seed + var imagen4OptionsJpeg = new GoogleImageGenerationOptions + { + ImageSize = GoogleImageSize.Size2K, + AspectRatio = GoogleImageAspectRatio.Ratio3x2, + OutputMimeType = GoogleOutputMimeType.Jpeg, + CompressionQuality = 90, + SafetyFilterLevel = GoogleSafetyFilterLevel.BlockOnlyHigh, + Seed = 12345 // Deterministic output + }; + var googleimagen_jpeg = new GoogleImagen4Generator(_settings.GoogleGeminiApiKey, _concurrency, _stats, "imagen4-jpeg", + location: _settings.GoogleCloudLocation, projectId: _settings.GoogleCloudProjectId, + googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, + options: imagen4OptionsJpeg); //recraft8, recraft9, var myGenerators = new List() { }; diff --git a/docs/GoogleImagenResolutionImplementationPlan.md b/docs/GoogleImagenResolutionImplementationPlan.md new file mode 100644 index 0000000..7d94bff --- /dev/null +++ b/docs/GoogleImagenResolutionImplementationPlan.md @@ -0,0 +1,325 @@ +# Implementation Plan: Google Imagen Resolution Support + +## Summary + +Add support for 1K, 2K, and 4K resolution options to both Google image generators in the codebase: +- `GoogleGenerator` (Gemini/Nano Banana) +- `GoogleImagen4Generator` (Vertex AI Imagen 4) + +## Existing Pattern Analysis + +Looking at how other generators handle size/resolution: + +| Generator | Size Parameter | Type | +|-----------|---------------|------| +| `RecraftGenerator` | `RecraftImageSize` | Enum with pixel dimensions | +| `IdeogramGenerator` | `IdeogramAspectRatio` | Enum with aspect ratios | +| `BFLGenerator` | `aspectRatio`, `width`, `height` | String + ints | +| `GptImageOneGenerator` | `size` | String ("1024x1024") | +| `Dalle3Generator` | `GeneratedImageSize` | OpenAI SDK enum | + +The Google APIs use simple string values ("1K", "2K", "4K"), so we should create a clean enum for type safety. + +--- + +## Implementation Steps + +### Step 1: Create GoogleImageSize Enum + +**File:** `MultiImageClient/Enums/GoogleImageSize.cs` + +```csharp +namespace MultiImageClient +{ + public enum GoogleImageSize + { + Size1K, + Size2K, + Size4K // Note: 4K only supported by Gemini, not Imagen 4 + } + + public static class GoogleImageSizeExtensions + { + public static string ToApiString(this GoogleImageSize size) + { + return size switch + { + GoogleImageSize.Size1K => "1K", + GoogleImageSize.Size2K => "2K", + GoogleImageSize.Size4K => "4K", + _ => "1K" + }; + } + } +} +``` + +### Step 2: Update GoogleGenerator (Gemini/Nano Banana) + +**File:** `MultiImageClient/ImageGenerators/GoogleGenerator.cs` + +Changes needed: +1. Add `_imageSize` and `_aspectRatio` fields +2. Update constructor to accept these parameters +3. Modify the request body to include `imageConfig` +4. Update pricing based on resolution + +```csharp +public class GoogleGenerator : IImageGenerator +{ + private SemaphoreSlim _googleSemaphore; + private HttpClient _httpClient; + private string _apiKey; + private MultiClientRunStats _stats; + private string _name; + private ImageGeneratorApiType _apiType; + private GoogleImageSize _imageSize; // NEW + private string _aspectRatio; // NEW + + public GoogleGenerator( + ImageGeneratorApiType apiType, + string apiKey, + int maxConcurrency, + MultiClientRunStats stats, + string name = "", + GoogleImageSize imageSize = GoogleImageSize.Size1K, // NEW + string aspectRatio = "1:1") // NEW + { + _apiKey = apiKey; + _googleSemaphore = new SemaphoreSlim(maxConcurrency); + _httpClient = new HttpClient(); + _name = string.IsNullOrEmpty(name) ? "" : name; + _stats = stats; + _apiType = apiType; + _imageSize = imageSize; // NEW + _aspectRatio = aspectRatio; // NEW + } + + // In ProcessPromptAsync, update the request body: + var requestBody = new + { + contents = new[] + { + new + { + parts = new[] + { + new { text = promptDetails.Prompt } + } + } + }, + generationConfig = new + { + responseModalities = new[] { "TEXT", "IMAGE" }, + imageConfig = new // NEW + { + imageSize = _imageSize.ToApiString(), + aspectRatio = _aspectRatio + } + } + }; + + // Update GetCost() to account for resolution + public decimal GetCost() + { + if (_apiType == ImageGeneratorApiType.GoogleNanoBanana) + { + // Higher resolution = more tokens + var baseTokens = 1290m; + var multiplier = _imageSize switch + { + GoogleImageSize.Size1K => 1.0m, + GoogleImageSize.Size2K => 4.0m, // 2x2 = 4x pixels + GoogleImageSize.Size4K => 16.0m, // 4x4 = 16x pixels + _ => 1.0m + }; + return (30m / 1000000m) * baseTokens * multiplier; + } + // ... rest + } + + // Update GetFilenamePart and GetGeneratorSpecPart to include resolution + public string GetFilenamePart(PromptDetails pd) + { + return $"{_apiType}_{_imageSize.ToApiString()}_{_aspectRatio.Replace(":", "x")}"; + } +} +``` + +### Step 3: Update GoogleImagen4Generator + +**File:** `MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs` + +Changes needed: +1. Add `_imageSize` field (only supports 1K and 2K) +2. Update constructor +3. Modify the request to include `sampleImageSize` + +```csharp +public class GoogleImagen4Generator : IImageGenerator +{ + // ... existing fields ... + private GoogleImageSize _imageSize; // NEW + + public GoogleImagen4Generator( + string apiKey, + int maxConcurrency, + MultiClientRunStats stats, + string name, + string aspectRatio, + string safetyFilterLevel, + string location, + string projectId, + string googleServiceAccountKeyPath, + GoogleImageSize imageSize = GoogleImageSize.Size1K) // NEW + { + // ... existing initialization ... + + // Validate: Imagen 4 doesn't support 4K + if (imageSize == GoogleImageSize.Size4K) + { + throw new ArgumentException("Imagen 4 does not support 4K resolution. Use 1K or 2K."); + } + _imageSize = imageSize; + } + + public async Task ProcessPromptAsync(...) + { + // Update the instance to include sampleImageSize: + var instance = new Google.Protobuf.WellKnownTypes.Value + { + StructValue = new Google.Protobuf.WellKnownTypes.Struct + { + Fields = + { + { "prompt", Google.Protobuf.WellKnownTypes.Value.ForString(promptDetails.Prompt) }, + { "numberOfImages", Google.Protobuf.WellKnownTypes.Value.ForNumber(1) }, + { "aspectRatio", Google.Protobuf.WellKnownTypes.Value.ForString(_aspectRatio) }, + { "sampleImageSize", Google.Protobuf.WellKnownTypes.Value.ForString(_imageSize.ToApiString()) }, // NEW + // ... other fields ... + } + } + }; + // ... + } + + // Update pricing based on resolution + public decimal GetCost() + { + return _imageSize switch + { + GoogleImageSize.Size1K => 0.04m, + GoogleImageSize.Size2K => 0.08m, // Estimated - higher res likely costs more + _ => 0.04m + }; + } +} +``` + +### Step 4: Update GeneratorGroups.cs + +**File:** `MultiImageClient/Workflows/GeneratorGroups.cs` + +```csharp +// Example usage with different resolutions: + +// Gemini/Nano Banana with various resolutions +var google_banana_1k = new GoogleGenerator( + ImageGeneratorApiType.GoogleNanoBanana, + _settings.GoogleGeminiApiKey, + _concurrency, + _stats, + name: "banana-1k", + imageSize: GoogleImageSize.Size1K, + aspectRatio: "16:9"); + +var google_banana_2k = new GoogleGenerator( + ImageGeneratorApiType.GoogleNanoBanana, + _settings.GoogleGeminiApiKey, + _concurrency, + _stats, + name: "banana-2k", + imageSize: GoogleImageSize.Size2K, + aspectRatio: "16:9"); + +var google_banana_4k = new GoogleGenerator( + ImageGeneratorApiType.GoogleNanoBanana, + _settings.GoogleGeminiApiKey, + _concurrency, + _stats, + name: "banana-4k", + imageSize: GoogleImageSize.Size4K, + aspectRatio: "1:1"); + +// Imagen 4 with various resolutions (no 4K support) +var googleimagen_1k = new GoogleImagen4Generator( + _settings.GoogleGeminiApiKey, + _concurrency, + _stats, + name: "imagen4-1k", + aspectRatio: "16:9", + safetyFilterLevel: "BLOCK_NONE", + location: _settings.GoogleCloudLocation, + projectId: _settings.GoogleCloudProjectId, + googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, + imageSize: GoogleImageSize.Size1K); + +var googleimagen_2k = new GoogleImagen4Generator( + _settings.GoogleGeminiApiKey, + _concurrency, + _stats, + name: "imagen4-2k", + aspectRatio: "16:9", + safetyFilterLevel: "BLOCK_NONE", + location: _settings.GoogleCloudLocation, + projectId: _settings.GoogleCloudProjectId, + googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, + imageSize: GoogleImageSize.Size2K); +``` + +--- + +## Files to Create/Modify + +| File | Action | Description | +|------|--------|-------------| +| `MultiImageClient/Enums/GoogleImageSize.cs` | **CREATE** | New enum for resolution options | +| `MultiImageClient/ImageGenerators/GoogleGenerator.cs` | MODIFY | Add imageSize & aspectRatio params | +| `MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs` | MODIFY | Add sampleImageSize param | +| `MultiImageClient/Workflows/GeneratorGroups.cs` | MODIFY | Update instantiation examples | + +--- + +## Testing Checklist + +- [ ] Verify 1K resolution works for GoogleGenerator (Gemini) +- [ ] Verify 2K resolution works for GoogleGenerator (Gemini) +- [ ] Verify 4K resolution works for GoogleGenerator (Gemini) +- [ ] Verify 1K resolution works for GoogleImagen4Generator +- [ ] Verify 2K resolution works for GoogleImagen4Generator +- [ ] Verify 4K throws appropriate error for GoogleImagen4Generator +- [ ] Verify aspect ratio combinations work with different resolutions +- [ ] Verify cost calculations are updated appropriately +- [ ] Verify file naming includes resolution info + +--- + +## Comparison with Other Generators + +This implementation follows established patterns: + +| Feature | GoogleGenerator | RecraftGenerator | IdeogramGenerator | +|---------|----------------|------------------|-------------------| +| Size enum | `GoogleImageSize` | `RecraftImageSize` | N/A (aspect only) | +| Aspect param | String | N/A (part of size) | Enum | +| Default size | 1K | 1024x1024 | N/A | +| Max size | 4K (Gemini) | 2048x1024 | 1536x1536 | + +--- + +## Notes + +1. **4K Limitation**: Only Gemini supports 4K; Imagen 4 maxes out at 2K +2. **Pricing**: Higher resolutions will increase costs - exact pricing TBD +3. **Quality vs Speed**: Larger images take longer to generate +4. **Aspect Ratio Interaction**: Resolution constrains the longer dimension while aspect ratio is maintained diff --git a/docs/GoogleImagenResolutionOptions.md b/docs/GoogleImagenResolutionOptions.md new file mode 100644 index 0000000..7eee4fe --- /dev/null +++ b/docs/GoogleImagenResolutionOptions.md @@ -0,0 +1,153 @@ +# Google Imagen API Options Reference + +## Overview + +Google provides two distinct APIs for image generation, each with different options: + +1. **Google Imagen 4 (Vertex AI)** - Dedicated image generation model accessed via Vertex AI +2. **Google Gemini API (Nano Banana)** - Multimodal LLM with native image generation + +--- + +## All Available Parameters + +### Imagen 4 (Vertex AI) Parameters + +| Parameter | Type | Values | Default | Description | +|-----------|------|--------|---------|-------------| +| `sampleImageSize` | string | `"1K"`, `"2K"` | `"1K"` | Output resolution (4K NOT supported) | +| `aspectRatio` | string | `"1:1"`, `"2:3"`, `"3:2"`, `"3:4"`, `"4:3"`, `"4:5"`, `"5:4"`, `"9:16"`, `"16:9"`, `"21:9"` | `"1:1"` | Image aspect ratio | +| `numberOfImages` | int | 1-4 | 1 | Number of images to generate | +| `enhancePrompt` | boolean | true/false | true | LLM-based prompt rewriting | +| `personGeneration` | string | `"allow_adult"`, `"dont_allow"`, `"ALLOW_ALL"` | `"allow_adult"` | Controls person/face generation | +| `safetySetting` | string | `"block_low_and_above"`, `"block_medium_and_above"`, `"block_only_high"` | `"block_medium_and_above"` | Safety filter threshold | +| `addWatermark` | boolean | true/false | false | Add SynthID digital watermark | +| `seed` | uint32 | any | random | Deterministic generation (only when addWatermark=false and enhancePrompt=false) | +| `includeRaiReason` | boolean | true/false | true | Include RAI filter reason in response | +| `outputOptions.mimeType` | string | `"image/png"`, `"image/jpeg"` | `"image/png"` | Output format | +| `outputOptions.compressionQuality` | int | 0-100 | 75 | JPEG compression (only for JPEG) | + +### Gemini API Parameters + +| Parameter | Type | Values | Default | Description | +|-----------|------|--------|---------|-------------| +| `imageConfig.imageSize` | string | `"1K"`, `"2K"`, `"4K"` | `"1K"` | Output resolution | +| `imageConfig.aspectRatio` | string | `"1:1"`, `"2:3"`, `"3:2"`, `"3:4"`, `"4:3"`, `"9:16"`, `"16:9"`, `"21:9"` | auto | Image aspect ratio | + +--- + +## Resolution Details + +| Setting | Approximate Resolution | Imagen 4 | Gemini | +|---------|----------------------|----------|--------| +| 1K | ~1024 × 1024 (for 1:1) | ✅ | ✅ | +| 2K | ~2048 × 2048 (for 1:1) | ✅ | ✅ | +| 4K | ~4096 × 4096 (for 1:1) | ❌ | ✅ | + +For non-square aspect ratios, the longer dimension is constrained to the K value. + +--- + +## Person Generation Settings + +| Value | Description | +|-------|-------------| +| `allow_adult` | Default. Allow generation of adults only. Celebrity generation is blocked. | +| `dont_allow` | Disable all people/faces in generated images | +| `ALLOW_ALL` | Most permissive - allows all person generation | + +--- + +## Safety Filter Levels + +| Value | Description | +|-------|-------------| +| `block_low_and_above` | Highest safety - most filtering, fewest images pass | +| `block_medium_and_above` | Default - balanced filtering | +| `block_only_high` | Lowest safety - least filtering, may increase objectionable content | + +--- + +## Output Format Options + +| Format | Pros | Cons | +|--------|------|------| +| PNG | Lossless, supports transparency | Larger file size | +| JPEG | Smaller file size, configurable quality | Lossy compression, no transparency | + +JPEG compression quality: 0 (smallest/worst) to 100 (largest/best), default 75. + +--- + +## API Request Examples + +### Imagen 4 (Full Options) + +```json +{ + "instances": [ + { + "prompt": "A serene mountain landscape at sunset" + } + ], + "parameters": { + "sampleImageSize": "2K", + "sampleCount": 1, + "aspectRatio": "16:9", + "enhancePrompt": false, + "personGeneration": "allow_adult", + "safetySetting": "block_only_high", + "addWatermark": false, + "seed": 12345, + "includeRaiReason": true, + "outputOptions": { + "mimeType": "image/jpeg", + "compressionQuality": 90 + } + } +} +``` + +### Gemini API + +```json +{ + "contents": [ + { + "parts": [ + { "text": "A serene mountain landscape at sunset" } + ] + } + ], + "generationConfig": { + "responseModalities": ["TEXT", "IMAGE"], + "imageConfig": { + "imageSize": "4K", + "aspectRatio": "16:9" + } + } +} +``` + +--- + +## Pricing Considerations + +### Imagen 4 +- Base price: ~$0.04 per image at 1K +- 2K resolution: ~$0.08 per image (estimated 2x) + +### Gemini (Token-based) +- $30 per 1M output tokens +- ~1290 tokens per 1K image +- Higher resolutions consume proportionally more tokens (4x for 2K, 16x for 4K) + +--- + +## References + +- [Vertex AI Image Generation](https://cloud.google.com/vertex-ai/generative-ai/docs/image/generate-images) +- [Imagen API Reference](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/imagen-api) +- [Set Output Resolution](https://cloud.google.com/vertex-ai/generative-ai/docs/image/set-output-resolution) +- [Configure Safety Settings](https://cloud.google.com/vertex-ai/generative-ai/docs/image/configure-responsible-ai-safety-settings) +- [Gemini API Image Generation](https://ai.google.dev/gemini-api/docs/image-generation)