-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathfim.go
150 lines (139 loc) · 7.77 KB
/
fim.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
package deepseek
import (
"bufio"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
)
// FIMCompletionRequest represents the request body for a Fill-In-the-Middle (FIM) completion.
type FIMCompletionRequest struct {
Model string `json:"model"` // Model name to use for completion.
Prompt string `json:"prompt"` // The prompt to start the completion from.
Suffix string `json:"suffix,omitempty"` // Optional: The suffix to complete the prompt with.
MaxTokens int `json:"max_tokens,omitempty"` // Optional: Maximum tokens to generate, > 1 and <= 4000.
Temperature float64 `json:"temperature,omitempty"` // Optional: Sampling temperature, between 0 and 1.
TopP float64 `json:"top_p,omitempty"` // Optional: Nucleus sampling probability threshold.
N int `json:"n,omitempty"` // Optional: Number of completions to generate.
Logprobs int `json:"logprobs,omitempty"` // Optional: Number of log probabilities to return.
Echo bool `json:"echo,omitempty"` // Optional: Whether to echo the prompt in the completion.
Stop []string `json:"stop,omitempty"` // Optional: List of stop sequences.
PresencePenalty float64 `json:"presence_penalty,omitempty"` // Optional: Penalty for new tokens based on their presence in the text so far.
FrequencyPenalty float64 `json:"frequency_penalty,omitempty"` // Optional: Penalty for new tokens based on their frequency in the text so far.
}
// FIMCompletionResponse represents the response body for a Fill-In-the-Middle (FIM) completion.
type FIMCompletionResponse struct {
ID string `json:"id"` // Unique ID for the completion.
Object string `json:"object"` // The object type, e.g., "text_completion".
Created int `json:"created"` // Timestamp of when the completion was created.
Model string `json:"model"` // Model used for the completion.
Choices []struct {
Text string `json:"text"` // The generated completion text.
Index int `json:"index"` // Index of the choice.
Logprobs Logprobs `json:"logprobs"` // Log probabilities of the generated tokens (if requested).
FinishReason string `json:"finish_reason"` // Reason for finishing the completion, e.g., "stop", "length".
} `json:"choices"`
Usage struct {
PromptTokens int `json:"prompt_tokens"` // Number of tokens in the prompt.
CompletionTokens int `json:"completion_tokens"` // Number of tokens in the completion.
TotalTokens int `json:"total_tokens"` // Total number of tokens used.
} `json:"usage"`
}
// FIMStreamCompletionRequest represents the request body for a streaming Fill-In-the-Middle (FIM) completion.
// It's similar to FIMCompletionRequest but includes a `Stream` field.
type FIMStreamCompletionRequest struct {
Model string `json:"model"` // Model name to use for completion.
Prompt string `json:"prompt"` // The prompt to start the completion from.
Stream bool `json:"stream"` // Whether to stream the completion. This is the key difference.
StreamOptions StreamOptions `json:"stream_options,omitempty"` // Optional: Options for streaming the completion.
Suffix string `json:"suffix,omitempty"` // Optional: The suffix to complete the prompt with.
MaxTokens int `json:"max_tokens,omitempty"` // Optional: Maximum tokens to generate, > 1 and <= 4000.
Temperature float64 `json:"temperature,omitempty"` // Optional: Sampling temperature, between 0 and 1.
TopP float64 `json:"top_p,omitempty"` // Optional: Nucleus sampling probability threshold.
N int `json:"n,omitempty"` // Optional: Number of completions to generate.
Logprobs int `json:"logprobs,omitempty"` // Optional: Number of log probabilities to return.
Echo bool `json:"echo,omitempty"` // Optional: Whether to echo the prompt in the completion.
Stop []string `json:"stop,omitempty"` // Optional: List of stop sequences.
PresencePenalty float64 `json:"presence_penalty,omitempty"` // Optional: Penalty for new tokens based on their presence in the text so far.
FrequencyPenalty float64 `json:"frequency_penalty,omitempty"` // Optional: Penalty for new tokens based on their frequency in the text so far.
}
// FIMStreamChoice represents a single choice within a streaming Fill-In-the-Middle (FIM) completion response.
type FIMStreamChoice struct {
// Text generated by the model for this choice.
Text string `json:"text"`
// Index of this choice within the list of choices.
Index int `json:"index"`
// Log probabilities for the generated tokens (if available). May be `nil`.
Logprobs Logprobs `json:"logprobs,omitempty"`
// Reason why the generation finished (e.g., "stop", "length"). May be `nil`.
FinishReason interface{} `json:"finish_reason,omitempty"`
}
// FIMStreamCompletionResponse represents the full response body for a streaming Fill-In-the-Middle (FIM) completion.
// It contains metadata about the completion request and a list of choices generated by the model.
type FIMStreamCompletionResponse struct {
// Unique identifier for the completion response.
ID string `json:"id"`
// List of choices generated by the model. Each choice represents a possible completion.
Choices []FIMStreamChoice `json:"choices"`
// Unix timestamp (seconds since the epoch) of when the completion was created.
Created int64 `json:"created"`
// Name of the model used for the completion.
Model string `json:"model"`
// Fingerprint of the system that generated the completion.
SystemFingerprint string `json:"system_fingerprint"`
// Type of object returned (always "text_completion" for FIM completions).
Object string `json:"object"`
// Usage statistics for the completion request (if available). May be `nil`.
Usage *StreamUsage `json:"usage,omitempty"`
}
// fimCompletionStream implements the ChatCompletionStream interface.
type fimCompletionStream struct {
ctx context.Context // Context for cancellation.
cancel context.CancelFunc // Cancel function for the context.
resp *http.Response // HTTP response from the API call.
reader *bufio.Reader // Reader for the response body.
}
// FIMChatCompletionStream is an interface for receiving streaming chat completion responses.
type FIMChatCompletionStream interface {
FIMRecv() (*FIMStreamCompletionResponse, error)
FIMClose() error
}
// FIMRecv receives the next response from the stream.
func (s *fimCompletionStream) FIMRecv() (*FIMStreamCompletionResponse, error) {
reader := s.reader
for {
line, err := reader.ReadString('\n') // Read until newline
if err != nil {
if err == io.EOF {
return nil, io.EOF
}
return nil, fmt.Errorf("error reading stream: %w", err)
}
line = strings.TrimSpace(line)
if line == "data: [DONE]" {
return nil, io.EOF // End of stream
}
if len(line) > 6 && line[:6] == "data: " {
trimmed := line[6:] // Trim the "data: " prefix
var response FIMStreamCompletionResponse
if err := json.Unmarshal([]byte(trimmed), &response); err != nil {
return nil, fmt.Errorf("unmarshal error: %w, raw data: %s", err, trimmed)
}
if response.Usage == nil {
response.Usage = &StreamUsage{}
}
return &response, nil
}
}
}
// FIMClose terminates the stream.
func (s *fimCompletionStream) FIMClose() error {
s.cancel()
err := s.resp.Body.Close()
if err != nil {
return fmt.Errorf("failed to close response body: %w", err)
}
return nil
}