Skip to content

Commit 9078be5

Browse files
authored
.Net Fix - OpenAIFilePurpose definitions and handling for OpenAIFileService (#6858)
### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> File-purpose values othen than `assistants` and `fine-tune` result in exception. ### Description <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> 1. Converted `enum` to string based `struct` as enum too brittle to handle file-purpose values and new purposes are introduced. 2. Updated test to actually utilize the file-service. 3. Centralized capture of content uri and file-reference id (as metadata) 4. Support filtering on specific file-purpose (`GetFilesAsync`) 5. Added integration test https://platform.openai.com/docs/api-reference/files/create#files-create-purpose https://platform.openai.com/docs/api-reference/files/object#files/object-purpose ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone 😄
1 parent f33d015 commit 9078be5

File tree

8 files changed

+359
-63
lines changed

8 files changed

+359
-63
lines changed

.github/_typos.toml

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ extend-exclude = [
1515
"CodeTokenizerTests.cs",
1616
"test_code_tokenizer.py",
1717
"*response.json",
18+
"test_content.txt",
1819
]
1920

2021
[default.extend-words]
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
// Copyright (c) Microsoft. All rights reserved.
2-
using Azure.AI.OpenAI.Assistants;
32
using Microsoft.SemanticKernel;
43
using Microsoft.SemanticKernel.Connectors.OpenAI;
54
using Resources;
65

76
namespace Agents;
87

98
/// <summary>
10-
/// Demonstrate uploading and retrieving files with <see cref="OpenAIFileService"/> .
9+
/// Demonstrate using <see cref="OpenAIFileService"/> .
1110
/// </summary>
1211
public class OpenAIAssistant_FileService(ITestOutputHelper output) : BaseTest(output)
1312
{
@@ -19,7 +18,6 @@ public class OpenAIAssistant_FileService(ITestOutputHelper output) : BaseTest(ou
1918
[Fact]
2019
public async Task UploadAndRetrieveFilesAsync()
2120
{
22-
var openAIClient = new AssistantsClient(TestConfiguration.OpenAI.ApiKey);
2321
OpenAIFileService fileService = new(TestConfiguration.OpenAI.ApiKey);
2422

2523
BinaryContent[] files = [
@@ -29,41 +27,40 @@ public async Task UploadAndRetrieveFilesAsync()
2927
new BinaryContent(data: await EmbeddedResource.ReadAllAsync("travelinfo.txt"), mimeType: "text/plain") { InnerContent = "travelinfo.txt" }
3028
];
3129

32-
var fileIds = new Dictionary<string, BinaryContent>();
33-
foreach (var file in files)
30+
var fileContents = new Dictionary<string, BinaryContent>();
31+
foreach (BinaryContent file in files)
3432
{
35-
var result = await openAIClient.UploadFileAsync(new BinaryData(file.Data), Azure.AI.OpenAI.Assistants.OpenAIFilePurpose.FineTune);
36-
fileIds.Add(result.Value.Id, file);
33+
OpenAIFileReference result = await fileService.UploadContentAsync(file, new(file.InnerContent!.ToString()!, OpenAIFilePurpose.FineTune));
34+
fileContents.Add(result.Id, file);
3735
}
3836

39-
foreach (var file in (await openAIClient.GetFilesAsync(Azure.AI.OpenAI.Assistants.OpenAIFilePurpose.FineTune)).Value)
37+
foreach (OpenAIFileReference fileReference in await fileService.GetFilesAsync(OpenAIFilePurpose.FineTune))
4038
{
41-
if (!fileIds.ContainsKey(file.Id))
39+
// Only interested in the files we uploaded
40+
if (!fileContents.ContainsKey(fileReference.Id))
4241
{
4342
continue;
4443
}
4544

46-
var data = (await openAIClient.GetFileContentAsync(file.Id)).Value;
45+
BinaryContent content = await fileService.GetFileContentAsync(fileReference.Id);
4746

48-
var mimeType = fileIds[file.Id].MimeType;
49-
var fileName = fileIds[file.Id].InnerContent!.ToString();
50-
var metadata = new Dictionary<string, object?> { ["id"] = file.Id };
51-
var uri = new Uri($"https://api.openai.com/v1/files/{file.Id}/content");
52-
var content = mimeType switch
47+
string? mimeType = fileContents[fileReference.Id].MimeType;
48+
string? fileName = fileContents[fileReference.Id].InnerContent!.ToString();
49+
ReadOnlyMemory<byte> data = content.Data ?? new();
50+
51+
var typedContent = mimeType switch
5352
{
54-
"image/jpeg" => new ImageContent(data, mimeType) { Uri = uri, InnerContent = fileName, Metadata = metadata },
55-
"audio/wav" => new AudioContent(data, mimeType) { Uri = uri, InnerContent = fileName, Metadata = metadata },
56-
_ => new BinaryContent(data, mimeType) { Uri = uri, InnerContent = fileName, Metadata = metadata }
53+
"image/jpeg" => new ImageContent(data, mimeType) { Uri = content.Uri, InnerContent = fileName, Metadata = content.Metadata },
54+
"audio/wav" => new AudioContent(data, mimeType) { Uri = content.Uri, InnerContent = fileName, Metadata = content.Metadata },
55+
_ => new BinaryContent(data, mimeType) { Uri = content.Uri, InnerContent = fileName, Metadata = content.Metadata }
5756
};
5857

59-
// Display the the file-name and mime-tyupe for each content type.
60-
Console.WriteLine($"File: {fileName} - {mimeType}");
61-
62-
// Display the each content type-name.
63-
Console.WriteLine($"Type: {content}");
58+
Console.WriteLine($"\nFile: {fileName} - {mimeType}");
59+
Console.WriteLine($"Type: {typedContent}");
60+
Console.WriteLine($"Uri: {typedContent.Uri}");
6461

6562
// Delete the test file remotely
66-
await openAIClient.DeleteFileAsync(file.Id);
63+
await fileService.DeleteFileAsync(fileReference.Id);
6764
}
6865
}
6966
}

dotnet/src/Connectors/Connectors.OpenAI/CompatibilitySuppressions.xml

+56
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
11
<?xml version="1.0" encoding="utf-8"?>
22
<!-- https://learn.microsoft.com/en-us/dotnet/fundamentals/package-validation/diagnostic-ids -->
33
<Suppressions xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
4+
<Suppression>
5+
<DiagnosticId>CP0002</DiagnosticId>
6+
<Target>F:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFilePurpose.Assistants</Target>
7+
<Left>lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Left>
8+
<Right>lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Right>
9+
<IsBaselineSuppression>true</IsBaselineSuppression>
10+
</Suppression>
11+
<Suppression>
12+
<DiagnosticId>CP0002</DiagnosticId>
13+
<Target>F:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFilePurpose.FineTune</Target>
14+
<Left>lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Left>
15+
<Right>lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Right>
16+
<IsBaselineSuppression>true</IsBaselineSuppression>
17+
</Suppression>
418
<Suppression>
519
<DiagnosticId>CP0002</DiagnosticId>
620
<Target>M:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFileService.GetFileContent(System.String,System.Threading.CancellationToken)</Target>
@@ -29,6 +43,20 @@
2943
<Right>lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Right>
3044
<IsBaselineSuppression>true</IsBaselineSuppression>
3145
</Suppression>
46+
<Suppression>
47+
<DiagnosticId>CP0002</DiagnosticId>
48+
<Target>F:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFilePurpose.Assistants</Target>
49+
<Left>lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Left>
50+
<Right>lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Right>
51+
<IsBaselineSuppression>true</IsBaselineSuppression>
52+
</Suppression>
53+
<Suppression>
54+
<DiagnosticId>CP0002</DiagnosticId>
55+
<Target>F:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFilePurpose.FineTune</Target>
56+
<Left>lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Left>
57+
<Right>lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Right>
58+
<IsBaselineSuppression>true</IsBaselineSuppression>
59+
</Suppression>
3260
<Suppression>
3361
<DiagnosticId>CP0002</DiagnosticId>
3462
<Target>M:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFileService.GetFileContent(System.String,System.Threading.CancellationToken)</Target>
@@ -57,4 +85,32 @@
5785
<Right>lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Right>
5886
<IsBaselineSuppression>true</IsBaselineSuppression>
5987
</Suppression>
88+
<Suppression>
89+
<DiagnosticId>CP0007</DiagnosticId>
90+
<Target>T:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFilePurpose</Target>
91+
<Left>lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Left>
92+
<Right>lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Right>
93+
<IsBaselineSuppression>true</IsBaselineSuppression>
94+
</Suppression>
95+
<Suppression>
96+
<DiagnosticId>CP0007</DiagnosticId>
97+
<Target>T:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFilePurpose</Target>
98+
<Left>lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Left>
99+
<Right>lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Right>
100+
<IsBaselineSuppression>true</IsBaselineSuppression>
101+
</Suppression>
102+
<Suppression>
103+
<DiagnosticId>CP0008</DiagnosticId>
104+
<Target>T:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFilePurpose</Target>
105+
<Left>lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Left>
106+
<Right>lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Right>
107+
<IsBaselineSuppression>true</IsBaselineSuppression>
108+
</Suppression>
109+
<Suppression>
110+
<DiagnosticId>CP0008</DiagnosticId>
111+
<Target>T:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAIFilePurpose</Target>
112+
<Left>lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Left>
113+
<Right>lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll</Right>
114+
<IsBaselineSuppression>true</IsBaselineSuppression>
115+
</Suppression>
60116
</Suppressions>
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,99 @@
11
// Copyright (c) Microsoft. All rights reserved.
22

3+
using System;
34
using System.Diagnostics.CodeAnalysis;
45

56
namespace Microsoft.SemanticKernel.Connectors.OpenAI;
67

78
/// <summary>
8-
/// Defines the purpose associated with the uploaded file.
9+
/// Defines the purpose associated with the uploaded file:
10+
/// https://platform.openai.com/docs/api-reference/files/object#files/object-purpose
911
/// </summary>
1012
[Experimental("SKEXP0010")]
11-
public enum OpenAIFilePurpose
13+
public readonly struct OpenAIFilePurpose : IEquatable<OpenAIFilePurpose>
1214
{
1315
/// <summary>
14-
/// File to be used by assistants for model processing.
16+
/// File to be used by assistants as input.
1517
/// </summary>
16-
Assistants,
18+
public static OpenAIFilePurpose Assistants { get; } = new("assistants");
1719

1820
/// <summary>
19-
/// File to be used by fine-tuning jobs.
21+
/// File produced as assistants output.
2022
/// </summary>
21-
FineTune,
23+
public static OpenAIFilePurpose AssistantsOutput { get; } = new("assistants_output");
24+
25+
/// <summary>
26+
/// Files uploaded as a batch of API requests
27+
/// </summary>
28+
public static OpenAIFilePurpose Batch { get; } = new("batch");
29+
30+
/// <summary>
31+
/// File produced as result of a file included as a batch request.
32+
/// </summary>
33+
public static OpenAIFilePurpose BatchOutput { get; } = new("batch_output");
34+
35+
/// <summary>
36+
/// File to be used as input to fine-tune a model.
37+
/// </summary>
38+
public static OpenAIFilePurpose FineTune { get; } = new("fine-tune");
39+
40+
/// <summary>
41+
/// File produced as result of fine-tuning a model.
42+
/// </summary>
43+
public static OpenAIFilePurpose FineTuneResults { get; } = new("fine-tune-results");
44+
45+
/// <summary>
46+
/// File to be used for Assistants image file inputs.
47+
/// </summary>
48+
public static OpenAIFilePurpose Vision { get; } = new("vision");
49+
50+
/// <summary>
51+
/// Gets the label associated with this <see cref="OpenAIFilePurpose"/>.
52+
/// </summary>
53+
public string Label { get; }
54+
55+
/// <summary>
56+
/// Creates a new <see cref="OpenAIFilePurpose"/> instance with the provided label.
57+
/// </summary>
58+
/// <param name="label">The label to associate with this <see cref="OpenAIFilePurpose"/>.</param>
59+
public OpenAIFilePurpose(string label)
60+
{
61+
Verify.NotNullOrWhiteSpace(label, nameof(label));
62+
this.Label = label!;
63+
}
64+
65+
/// <summary>
66+
/// Returns a value indicating whether two <see cref="OpenAIFilePurpose"/> instances are equivalent, as determined by a
67+
/// case-insensitive comparison of their labels.
68+
/// </summary>
69+
/// <param name="left"> the first <see cref="OpenAIFilePurpose"/> instance to compare </param>
70+
/// <param name="right"> the second <see cref="OpenAIFilePurpose"/> instance to compare </param>
71+
/// <returns> true if left and right are both null or have equivalent labels; false otherwise </returns>
72+
public static bool operator ==(OpenAIFilePurpose left, OpenAIFilePurpose right)
73+
=> left.Equals(right);
74+
75+
/// <summary>
76+
/// Returns a value indicating whether two <see cref="OpenAIFilePurpose"/> instances are not equivalent, as determined by a
77+
/// case-insensitive comparison of their labels.
78+
/// </summary>
79+
/// <param name="left"> the first <see cref="OpenAIFilePurpose"/> instance to compare </param>
80+
/// <param name="right"> the second <see cref="OpenAIFilePurpose"/> instance to compare </param>
81+
/// <returns> false if left and right are both null or have equivalent labels; true otherwise </returns>
82+
public static bool operator !=(OpenAIFilePurpose left, OpenAIFilePurpose right)
83+
=> !(left == right);
84+
85+
/// <inheritdoc/>
86+
public override bool Equals([NotNullWhen(true)] object? obj)
87+
=> obj is OpenAIFilePurpose otherPurpose && this == otherPurpose;
88+
89+
/// <inheritdoc/>
90+
public bool Equals(OpenAIFilePurpose other)
91+
=> string.Equals(this.Label, other.Label, StringComparison.OrdinalIgnoreCase);
92+
93+
/// <inheritdoc/>
94+
public override int GetHashCode()
95+
=> StringComparer.OrdinalIgnoreCase.GetHashCode(this.Label);
96+
97+
/// <inheritdoc/>
98+
public override string ToString() => this.Label;
2299
}

dotnet/src/Connectors/Connectors.OpenAI/Files/OpenAIFileService.cs

+22-22
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ public async Task DeleteFileAsync(string id, CancellationToken cancellationToken
112112
public async Task<BinaryContent> GetFileContentAsync(string id, CancellationToken cancellationToken = default)
113113
{
114114
Verify.NotNull(id, nameof(id));
115-
var (stream, mimetype) = await this.StreamGetRequestAsync($"{this._serviceUri}/{id}/content", cancellationToken).ConfigureAwait(false);
115+
var contentUri = $"{this._serviceUri}/{id}/content";
116+
var (stream, mimetype) = await this.StreamGetRequestAsync(contentUri, cancellationToken).ConfigureAwait(false);
116117

117118
using (stream)
118119
{
@@ -123,7 +124,12 @@ public async Task<BinaryContent> GetFileContentAsync(string id, CancellationToke
123124
#else
124125
await stream.CopyToAsync(memoryStream, cancellationToken).ConfigureAwait(false);
125126
#endif
126-
return new BinaryContent(memoryStream.ToArray(), mimetype);
127+
return
128+
new(memoryStream.ToArray(), mimetype)
129+
{
130+
Metadata = new Dictionary<string, object?>() { { "id", id } },
131+
Uri = new Uri(contentUri),
132+
};
127133
}
128134
}
129135

@@ -147,9 +153,19 @@ public async Task<OpenAIFileReference> GetFileAsync(string id, CancellationToken
147153
/// </summary>
148154
/// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
149155
/// <returns>The metadata of all uploaded files.</returns>
150-
public async Task<IEnumerable<OpenAIFileReference>> GetFilesAsync(CancellationToken cancellationToken = default)
156+
public Task<IEnumerable<OpenAIFileReference>> GetFilesAsync(CancellationToken cancellationToken = default)
157+
=> this.GetFilesAsync(null, cancellationToken);
158+
159+
/// <summary>
160+
/// Retrieve metadata for previously uploaded files
161+
/// </summary>
162+
/// <param name="filePurpose">The purpose of the files by which to filter.</param>
163+
/// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
164+
/// <returns>The metadata of all uploaded files.</returns>
165+
public async Task<IEnumerable<OpenAIFileReference>> GetFilesAsync(OpenAIFilePurpose? filePurpose, CancellationToken cancellationToken = default)
151166
{
152-
var result = await this.ExecuteGetRequestAsync<FileInfoList>(this._serviceUri.ToString(), cancellationToken).ConfigureAwait(false);
167+
var serviceUri = filePurpose.HasValue && !string.IsNullOrEmpty(filePurpose.Value.Label) ? $"{this._serviceUri}?purpose={filePurpose}" : this._serviceUri.ToString();
168+
var result = await this.ExecuteGetRequestAsync<FileInfoList>(serviceUri, cancellationToken).ConfigureAwait(false);
153169

154170
return result.Data.Select(this.ConvertFileReference).ToArray();
155171
}
@@ -167,7 +183,7 @@ public async Task<OpenAIFileReference> UploadContentAsync(BinaryContent fileCont
167183
Verify.NotNull(fileContent.Data, nameof(fileContent.Data));
168184

169185
using var formData = new MultipartFormDataContent();
170-
using var contentPurpose = new StringContent(this.ConvertPurpose(settings.Purpose));
186+
using var contentPurpose = new StringContent(settings.Purpose.Label);
171187
using var contentFile = new ByteArrayContent(fileContent.Data.Value.ToArray());
172188
formData.Add(contentPurpose, "purpose");
173189
formData.Add(contentFile, "file", settings.FileName);
@@ -281,26 +297,10 @@ private OpenAIFileReference ConvertFileReference(FileInfo result)
281297
FileName = result.FileName,
282298
CreatedTimestamp = DateTimeOffset.FromUnixTimeSeconds(result.CreatedAt).UtcDateTime,
283299
SizeInBytes = result.Bytes ?? 0,
284-
Purpose = this.ConvertPurpose(result.Purpose),
300+
Purpose = new(result.Purpose),
285301
};
286302
}
287303

288-
private OpenAIFilePurpose ConvertPurpose(string purpose) =>
289-
purpose.ToUpperInvariant() switch
290-
{
291-
"ASSISTANTS" => OpenAIFilePurpose.Assistants,
292-
"FINE-TUNE" => OpenAIFilePurpose.FineTune,
293-
_ => throw new KernelException($"Unknown {nameof(OpenAIFilePurpose)}: {purpose}."),
294-
};
295-
296-
private string ConvertPurpose(OpenAIFilePurpose purpose) =>
297-
purpose switch
298-
{
299-
OpenAIFilePurpose.Assistants => "assistants",
300-
OpenAIFilePurpose.FineTune => "fine-tune",
301-
_ => throw new KernelException($"Unknown {nameof(OpenAIFilePurpose)}: {purpose}."),
302-
};
303-
304304
private sealed class FileInfoList
305305
{
306306
[JsonPropertyName("data")]

0 commit comments

Comments
 (0)