Skip to content

Commit 7dfddc7

Browse files
committed
centralized request middleware
Signed-off-by: Dave Lee <[email protected]>
1 parent 5f130fe commit 7dfddc7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+904
-679
lines changed

Diff for: Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
88
# llama.cpp versions
99
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
1010
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
11-
CPPLLAMA_VERSION?=a89f75e1b7b90cb2d4d4c52ca53ef9e9b466aa45
11+
CPPLLAMA_VERSION?=9e041024481f6b249ab8918e18b9477f873b5a5e
1212

1313
# go-rwkv version
1414
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

Diff for: backend/python/autogptq/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
accelerate
22
auto-gptq==0.7.1
3-
grpcio==1.66.2
3+
grpcio==1.67.0
44
protobuf
55
certifi
66
transformers

Diff for: backend/python/bark/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
bark==0.1.5
2-
grpcio==1.66.2
2+
grpcio==1.67.0
33
protobuf
44
certifi

Diff for: backend/python/common/template/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
grpcio==1.66.2
1+
grpcio==1.67.0
22
protobuf

Diff for: backend/python/coqui/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
coqui-tts
2-
grpcio==1.66.2
2+
grpcio==1.67.0
33
protobuf
44
certifi

Diff for: backend/python/diffusers/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
setuptools
2-
grpcio==1.66.2
2+
grpcio==1.67.0
33
pillow
44
protobuf
55
certifi

Diff for: backend/python/exllama2/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
grpcio==1.66.2
1+
grpcio==1.67.0
22
protobuf
33
certifi
44
wheel

Diff for: backend/python/mamba/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
grpcio==1.66.2
1+
grpcio==1.67.0
22
protobuf
33
certifi

Diff for: backend/python/openvoice/requirements-intel.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
intel-extension-for-pytorch
33
torch
44
optimum[openvino]
5-
grpcio==1.66.2
5+
grpcio==1.67.0
66
protobuf
77
librosa==0.9.1
88
faster-whisper==1.0.3

Diff for: backend/python/openvoice/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
grpcio==1.66.2
1+
grpcio==1.67.0
22
protobuf
33
librosa
44
faster-whisper

Diff for: backend/python/parler-tts/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
grpcio==1.66.2
1+
grpcio==1.67.0
22
protobuf
33
certifi
44
llvmlite==0.43.0

Diff for: backend/python/rerankers/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
grpcio==1.66.2
1+
grpcio==1.67.0
22
protobuf
33
certifi

Diff for: backend/python/sentencetransformers/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
grpcio==1.66.2
1+
grpcio==1.67.0
22
protobuf
33
certifi
44
datasets
+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
grpcio==1.66.2
1+
grpcio==1.67.0
22
protobuf
33
scipy==1.14.0
44
certifi

Diff for: backend/python/transformers/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
grpcio==1.66.2
1+
grpcio==1.67.0
22
protobuf
33
certifi
44
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

Diff for: backend/python/vall-e-x/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
grpcio==1.66.2
1+
grpcio==1.67.0
22
protobuf
33
certifi

Diff for: backend/python/vllm/install.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
2222
git clone https://github.com/vllm-project/vllm
2323
fi
2424
pushd vllm
25-
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
25+
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.67.0 protobuf bitsandbytes
2626
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
2727
VLLM_TARGET_DEVICE=cpu python setup.py install
2828
popd

Diff for: backend/python/vllm/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
grpcio==1.66.2
1+
grpcio==1.67.0
22
protobuf
33
certifi
44
setuptools

Diff for: core/backend/llm.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@ type TokenUsage struct {
3131
Completion int
3232
}
3333

34-
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
34+
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
3535
modelFile := c.Model
3636

3737
var inferenceModel grpc.Backend
3838
var err error
3939

40-
opts := ModelOptions(c, o, []model.Option{})
40+
opts := ModelOptions(*c, o, []model.Option{})
4141

4242
if c.Backend != "" {
4343
opts = append(opts, model.WithBackendString(c.Backend))
@@ -85,7 +85,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
8585

8686
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
8787
fn := func() (LLMResponse, error) {
88-
opts := gRPCPredictOpts(c, loader.ModelPath)
88+
opts := gRPCPredictOpts(*c, loader.ModelPath)
8989
opts.Prompt = s
9090
opts.Messages = protoMessages
9191
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate

Diff for: core/backend/tokenize.go

+1-3
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@ import (
77
model "github.com/mudler/LocalAI/pkg/model"
88
)
99

10-
func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
11-
12-
modelFile := backendConfig.Model
10+
func ModelTokenize(s string, modelFile string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
1311

1412
var inferenceModel grpc.Backend
1513
var err error

Diff for: core/config/backend_config.go

+20-11
Original file line numberDiff line numberDiff line change
@@ -433,19 +433,20 @@ func (c *BackendConfig) HasTemplate() bool {
433433
type BackendConfigUsecases int
434434

435435
const (
436-
FLAG_ANY BackendConfigUsecases = 0b000000000
437-
FLAG_CHAT BackendConfigUsecases = 0b000000001
438-
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
439-
FLAG_EDIT BackendConfigUsecases = 0b000000100
440-
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
441-
FLAG_RERANK BackendConfigUsecases = 0b000010000
442-
FLAG_IMAGE BackendConfigUsecases = 0b000100000
443-
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
444-
FLAG_TTS BackendConfigUsecases = 0b010000000
445-
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
436+
FLAG_ANY BackendConfigUsecases = 0b0000000000
437+
FLAG_CHAT BackendConfigUsecases = 0b0000000001
438+
FLAG_COMPLETION BackendConfigUsecases = 0b0000000010
439+
FLAG_EDIT BackendConfigUsecases = 0b0000000100
440+
FLAG_EMBEDDINGS BackendConfigUsecases = 0b0000001000
441+
FLAG_RERANK BackendConfigUsecases = 0b0000010000
442+
FLAG_IMAGE BackendConfigUsecases = 0b0000100000
443+
FLAG_TRANSCRIPT BackendConfigUsecases = 0b0001000000
444+
FLAG_TTS BackendConfigUsecases = 0b0010000000
445+
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b0100000000
446+
FLAG_TOKENIZE BackendConfigUsecases = 0b1000000000
446447

447448
// Common Subsets
448-
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
449+
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
449450
)
450451

451452
func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
@@ -460,6 +461,7 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
460461
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
461462
"FLAG_TTS": FLAG_TTS,
462463
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
464+
"FLAG_TOKENIZE": FLAG_TOKENIZE,
463465
"FLAG_LLM": FLAG_LLM,
464466
}
465467
}
@@ -545,5 +547,12 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
545547
}
546548
}
547549

550+
if (u & FLAG_TOKENIZE) == FLAG_TOKENIZE {
551+
tokenizeCapableBackends := []string{"llama.cpp", "rwkv"}
552+
if !slices.Contains(tokenizeCapableBackends, c.Backend) {
553+
return false
554+
}
555+
}
556+
548557
return true
549558
}

Diff for: core/config/backend_config_loader.go

+11-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,9 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
117117
// Load a config file if present after the model name
118118
cfg := &BackendConfig{
119119
PredictionOptions: schema.PredictionOptions{
120-
Model: modelName,
120+
BasicModelRequest: schema.BasicModelRequest{
121+
Model: modelName,
122+
},
121123
},
122124
}
123125

@@ -145,6 +147,14 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
145147
return cfg, nil
146148
}
147149

150+
func (bcl *BackendConfigLoader) LoadBackendConfigFileByNameDefaultOptions(modelName string, appConfig *ApplicationConfig) (*BackendConfig, error) {
151+
return bcl.LoadBackendConfigFileByName(modelName, appConfig.ModelPath,
152+
LoadOptionDebug(appConfig.Debug),
153+
LoadOptionThreads(appConfig.Threads),
154+
LoadOptionContextSize(appConfig.ContextSize),
155+
LoadOptionF16(appConfig.F16))
156+
}
157+
148158
// This format is currently only used when reading a single file at startup, passed in via ApplicationConfig.ConfigFile
149159
func (bcl *BackendConfigLoader) LoadMultipleBackendConfigsSingleFile(file string, opts ...ConfigLoaderOption) error {
150160
bcl.Lock()

Diff for: core/config/guesser.go

+5-4
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@ const (
2626
type settingsConfig struct {
2727
StopWords []string
2828
TemplateConfig TemplateConfig
29-
RepeatPenalty float64
29+
RepeatPenalty float64
3030
}
3131

3232
// default settings to adopt with a given model family
3333
var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
3434
Gemma: {
3535
RepeatPenalty: 1.0,
36-
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
36+
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
3737
TemplateConfig: TemplateConfig{
3838
Chat: "{{.Input }}\n<start_of_turn>model\n",
3939
ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
@@ -161,10 +161,11 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
161161
}
162162

163163
// We try to guess only if we don't have a template defined already
164-
f, err := gguf.ParseGGUFFile(filepath.Join(modelPath, cfg.ModelFileName()))
164+
guessPath := filepath.Join(modelPath, cfg.ModelFileName())
165+
f, err := gguf.ParseGGUFFile(guessPath)
165166
if err != nil {
166167
// Only valid for gguf files
167-
log.Debug().Msgf("guessDefaultsFromFile: %s", "not a GGUF file")
168+
log.Debug().Str("filePath", guessPath).Msg("guessDefaultsFromFile: not a GGUF file")
168169
return
169170
}
170171

Diff for: core/http/app.go

+7-5
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
121121
})
122122
}
123123

124-
// Health Checks should always be exempt from auth, so register these first
124+
// Health Checks should always be exempt from auth, so register these first
125125
routes.HealthRoutes(app)
126126

127127
kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
@@ -156,13 +156,15 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
156156
galleryService := services.NewGalleryService(appConfig)
157157
galleryService.Start(appConfig.Context, cl)
158158

159-
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig)
160-
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService)
161-
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig)
159+
requestExtractor := middleware.NewRequestExtractor(cl, ml, appConfig)
160+
161+
routes.RegisterElevenLabsRoutes(app, requestExtractor, cl, ml, appConfig)
162+
routes.RegisterLocalAIRoutes(app, requestExtractor, cl, ml, appConfig, galleryService)
163+
routes.RegisterOpenAIRoutes(app, requestExtractor, cl, ml, appConfig)
162164
if !appConfig.DisableWebUI {
163165
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService)
164166
}
165-
routes.RegisterJINARoutes(app, cl, ml, appConfig)
167+
routes.RegisterJINARoutes(app, requestExtractor, cl, ml, appConfig)
166168

167169
httpFS := http.FS(embedDirStatic)
168170

Diff for: core/http/ctx/fiber.go

-47
This file was deleted.

0 commit comments

Comments
 (0)