Skip to content

Commit 23e6d36

Browse files
committed
v0.1
1 parent c94bf1f commit 23e6d36

File tree

4 files changed

+597
-0
lines changed

4 files changed

+597
-0
lines changed

api/groupversion_info.go

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
// Package v1alpha1 contains API Schema definitions for the gateway v1alpha1 API group
18+
// +kubebuilder:object:generate=true
19+
// +groupName=inference.networking.x-k8s.io
20+
package v1alpha1
21+
22+
import (
23+
"k8s.io/apimachinery/pkg/runtime/schema"
24+
"sigs.k8s.io/controller-runtime/pkg/scheme"
25+
)
26+
27+
var (
28+
// GroupVersion is group version used to register these objects
29+
GroupVersion = schema.GroupVersion{Group: "inference.networking.x-k8s.io", Version: "v1alpha1"}
30+
31+
// SchemeGroupVersion is alias to GroupVersion for client-go libraries.
32+
// It is required by pkg/client/informers/externalversions/...
33+
SchemeGroupVersion = GroupVersion
34+
35+
// SchemeBuilder is used to add go types to the GroupVersionKind scheme
36+
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
37+
38+
// AddToScheme adds the types in this group-version to the given scheme.
39+
AddToScheme = SchemeBuilder.AddToScheme
40+
)
41+
42+
// Resource is required by pkg/client/listers/...
43+
func Resource(resource string) schema.GroupResource {
44+
return GroupVersion.WithResource(resource).GroupResource()
45+
}

api/inferencemodel_types.go

+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
)
22+
23+
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
24+
25+
// InferenceModelSpec represents a specific model use case. This resource is
26+
// managed by the "Inference Workload Owner" persona.
27+
//
28+
// The Inference Workload Owner persona is: a team that trains, verifies, and
29+
// leverages a large language model from a model frontend, drives the lifecycle
30+
// and rollout of new versions of those models, and defines the specific
31+
// performance and latency goals for the model. These workloads are
32+
// expected to operate within an InferencePool sharing compute capacity with other
33+
// InferenceModels, defined by the Inference Platform Admin.
34+
//
35+
// InferenceModel's modelName (not the ObjectMeta name) is unique for a given InferencePool,
36+
// if the name is reused, an error will be shown on the status of a
37+
// InferenceModel that attempted to reuse. The oldest InferenceModel, based on
38+
// creation timestamp, will be selected to remain valid. In the event of a race
39+
// condition, one will be selected at random.
40+
type InferenceModelSpec struct {
41+
// The name of the model as the users set in the "model" parameter in the requests.
42+
// The name should be unique among the workloads that reference the same backend pool.
43+
// This is the parameter that will be used to match the request with. In the future, we may
44+
// allow to match on other request parameters. The other approach to support matching on
45+
// on other request parameters is to use a different ModelName per HTTPFilter.
46+
// Names can be reserved without implementing an actual model in the pool.
47+
// This can be done by specifying a target model and setting the weight to zero,
48+
// an error will be returned specifying that no valid target model is found.
49+
//
50+
// +optional
51+
// +kubebuilder:validation:MaxLength=253
52+
ModelName string `json:"modelName,omitempty"`
53+
// Defines how important it is to serve the model compared to other models referencing the same pool.
54+
//
55+
// +optional
56+
// +kubebuilder:default="Default"
57+
Criticality *Criticality `json:"criticality,omitempty"`
58+
// Allow multiple versions of a model for traffic splitting.
59+
// If not specified, the target model name is defaulted to the modelName parameter.
60+
// modelName is often in reference to a LoRA adapter.
61+
//
62+
// +optional
63+
// +kubebuilder:validation:MaxItems=10
64+
TargetModels []TargetModel `json:"targetModels,omitempty"`
65+
// Reference to the inference pool, the pool must exist in the same namespace.
66+
//
67+
// +kubebuilder:validation:Required
68+
PoolRef PoolObjectReference `json:"poolRef"`
69+
}
70+
71+
// PoolObjectReference identifies an API object within the namespace of the
72+
// referrer.
73+
type PoolObjectReference struct {
74+
// Group is the group of the referent.
75+
//
76+
// +optional
77+
// +kubebuilder:default="inference.networking.x-k8s.io"
78+
// +kubebuilder:validation:MaxLength=253
79+
// +kubebuilder:validation:Pattern=`^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$`
80+
Group string `json:"group,omitempty"`
81+
82+
// Kind is kind of the referent. For example "InferencePool".
83+
//
84+
// +optional
85+
// +kubebuilder:default="InferencePool"
86+
// +kubebuilder:validation:MinLength=1
87+
// +kubebuilder:validation:MaxLength=63
88+
// +kubebuilder:validation:Pattern=`^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`
89+
Kind string `json:"kind,omitempty"`
90+
91+
// Name is the name of the referent.
92+
//
93+
// +kubebuilder:validation:MinLength=1
94+
// +kubebuilder:validation:MaxLength=253
95+
// +kubebuilder:validation:Required
96+
Name string `json:"name,omitempty"`
97+
}
98+
99+
// Defines how important it is to serve the model compared to other models.
100+
// +kubebuilder:validation:Enum=Critical;Default;Sheddable
101+
type Criticality string
102+
103+
const (
104+
// Most important. Requests to this band will be shed last.
105+
Critical Criticality = "Critical"
106+
// More important than Sheddable, less important than Critical.
107+
// Requests in this band will be shed before critical traffic.
108+
// +kubebuilder:default=Default
109+
Default Criticality = "Default"
110+
// Least important. Requests to this band will be shed before all other bands.
111+
Sheddable Criticality = "Sheddable"
112+
)
113+
114+
// TargetModel represents a deployed model or a LoRA adapter. The
115+
// Name field is expected to match the name of the LoRA adapter
116+
// (or base model) as it is registered within the model server. Inference
117+
// Gateway assumes that the model exists on the model server and is the
118+
// responsibility of the user to validate a correct match. Should a model fail
119+
// to exist at request time, the error is processed by the Instance Gateway,
120+
// and then emitted on the appropriate InferenceModel object.
121+
type TargetModel struct {
122+
// The name of the adapter as expected by the ModelServer.
123+
//
124+
// +optional
125+
// +kubebuilder:validation:MaxLength=253
126+
Name string `json:"name,omitempty"`
127+
// Weight is used to determine the proportion of traffic that should be
128+
// sent to this target model when multiple versions of the model are specified.
129+
//
130+
// +optional
131+
// +kubebuilder:default=1
132+
// +kubebuilder:validation:Minimum=0
133+
// +kubebuilder:validation:Maximum=1000000
134+
Weight int32 `json:"weight,omitempty"`
135+
}
136+
137+
// InferenceModelStatus defines the observed state of InferenceModel
138+
type InferenceModelStatus struct {
139+
// Conditions track the state of the InferencePool.
140+
Conditions []metav1.Condition `json:"conditions,omitempty"`
141+
}
142+
143+
// +kubebuilder:object:root=true
144+
// +kubebuilder:subresource:status
145+
// +genclient
146+
147+
// InferenceModel is the Schema for the InferenceModels API
148+
type InferenceModel struct {
149+
metav1.TypeMeta `json:",inline"`
150+
metav1.ObjectMeta `json:"metadata,omitempty"`
151+
152+
Spec InferenceModelSpec `json:"spec,omitempty"`
153+
Status InferenceModelStatus `json:"status,omitempty"`
154+
}
155+
156+
// +kubebuilder:object:root=true
157+
158+
// InferenceModelList contains a list of InferenceModel
159+
type InferenceModelList struct {
160+
metav1.TypeMeta `json:",inline"`
161+
metav1.ListMeta `json:"metadata,omitempty"`
162+
Items []InferenceModel `json:"items"`
163+
}
164+
165+
func init() {
166+
SchemeBuilder.Register(&InferenceModel{}, &InferenceModelList{})
167+
}

api/inferencepool_types.go

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
)
22+
23+
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
24+
25+
// InferencePoolSpec defines the desired state of InferencePool
26+
type InferencePoolSpec struct {
27+
28+
// Selector uses a map of label to watch model server pods
29+
// that should be included in the InferencePool. ModelServers should not
30+
// be with any other Service or InferencePool, that behavior is not supported
31+
// and will result in sub-optimal utilization.
32+
// In some cases, implementations may translate this to a Service selector, so this matches the simple
33+
// map used for Service selectors instead of the full Kubernetes LabelSelector type.
34+
//
35+
// +kubebuilder:validation:Required
36+
Selector map[LabelKey]LabelValue `json:"selector,omitempty"`
37+
38+
// TargetPortNumber is the port number that the model servers within the pool expect
39+
// to recieve traffic from.
40+
// This maps to the TargetPort in: https://pkg.go.dev/k8s.io/api/core/v1#ServicePort
41+
//
42+
// +kubebuilder:validation:Minimum=0
43+
// +kubebuilder:validation:Maximum=65535
44+
// +kubebuilder:validation:Required
45+
TargetPortNumber int32 `json:"targetPortNumber,omitempty"`
46+
}
47+
48+
// Originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731
49+
// Duplicated as to not take an unexpected dependency on gw's API.
50+
//
51+
// LabelKey is the key of a label. This is used for validation
52+
// of maps. This matches the Kubernetes "qualified name" validation that is used for labels.
53+
//
54+
// Valid values include:
55+
//
56+
// * example
57+
// * example.com
58+
// * example.com/path
59+
// * example.com/path.html
60+
//
61+
// Invalid values include:
62+
//
63+
// * example~ - "~" is an invalid character
64+
// * example.com. - can not start or end with "."
65+
//
66+
// +kubebuilder:validation:MinLength=1
67+
// +kubebuilder:validation:MaxLength=253
68+
// +kubebuilder:validation:Pattern=`^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9]$`
69+
type LabelKey string
70+
71+
// LabelValue is the value of a label. This is used for validation
72+
// of maps. This matches the Kubernetes label validation rules:
73+
// * must be 63 characters or less (can be empty),
74+
// * unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]),
75+
// * could contain dashes (-), underscores (_), dots (.), and alphanumerics between.
76+
//
77+
// Valid values include:
78+
//
79+
// * MyValue
80+
// * my.name
81+
// * 123-my-value
82+
//
83+
// +kubebuilder:validation:MinLength=0
84+
// +kubebuilder:validation:MaxLength=63
85+
// +kubebuilder:validation:Pattern=`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$`
86+
type LabelValue string
87+
88+
// InferencePoolStatus defines the observed state of InferencePool
89+
type InferencePoolStatus struct {
90+
91+
// Conditions track the state of the InferencePool.
92+
Conditions []metav1.Condition `json:"conditions,omitempty"`
93+
}
94+
95+
// +kubebuilder:object:root=true
96+
// +kubebuilder:subresource:status
97+
// +genclient
98+
99+
// InferencePool is the Schema for the Inferencepools API
100+
type InferencePool struct {
101+
metav1.TypeMeta `json:",inline"`
102+
metav1.ObjectMeta `json:"metadata,omitempty"`
103+
104+
Spec InferencePoolSpec `json:"spec,omitempty"`
105+
Status InferencePoolStatus `json:"status,omitempty"`
106+
}
107+
108+
// +kubebuilder:object:root=true
109+
110+
// InferencePoolList contains a list of InferencePool
111+
type InferencePoolList struct {
112+
metav1.TypeMeta `json:",inline"`
113+
metav1.ListMeta `json:"metadata,omitempty"`
114+
Items []InferencePool `json:"items"`
115+
}
116+
117+
func init() {
118+
SchemeBuilder.Register(&InferencePool{}, &InferencePoolList{})
119+
}

0 commit comments

Comments
 (0)