Skip to content

Commit

Permalink
Add APIs for configuring fair sharing (#2070)
Browse files Browse the repository at this point in the history
* Add APIs for configuring fair sharing

Change-Id: I74b24ba8715290753c0bd9b966a109b5b01078b4

* Validation and defaulting for fair sharing

Change-Id: If347eabdb17af643d46a1e4ad78b79e73c424011

* Implement multiple fair strategies

Change-Id: I6938d83399c55fecf952a570e2e431ad4ab479b2

* Implement fair sharing weight

Change-Id: I8b49d21f0b0e7a7d2607d9589ba49a4a914cd2aa

* Fix flaky integration

Change-Id: I68ba0c546ce27d9e6565a30584aacd67fd318ede

* review

Change-Id: I375166b8c7fdc300eeb43ede25fbcbe73298c7a2

* Update documentation for strategies

Change-Id: I4d56c73f4a949fa5c2b2053fd83a7161553755b0

* Change default configuration to match KEP

Change-Id: I6893b7854656601e80f157b60caea27e27dfe4ea

* Disable fair sharing by default

Change-Id: I6379c4304cfb45a08940d57a24b79c6b1564ccca

* Relax validation

Change-Id: I1ec9229cb866d31ece5046741ac7686157639758
  • Loading branch information
alculquicondor authored May 8, 2024
1 parent 6cceaa0 commit 54d1c01
Show file tree
Hide file tree
Showing 32 changed files with 976 additions and 41 deletions.
34 changes: 34 additions & 0 deletions apis/config/v1beta1/configuration_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ type Configuration struct {

// MultiKueue controls the behaviour of the MultiKueue AdmissionCheck Controller.
MultiKueue *MultiKueue `json:"multiKueue,omitempty"`

// FairSharing controls the fair sharing semantics across the cluster.
FairSharing *FairSharing `json:"fairSharing,omitempty"`
}

type ControllerManager struct {
Expand Down Expand Up @@ -352,3 +355,34 @@ type ClusterQueueVisibility struct {
// Defaults to 10.
MaxCount int32 `json:"maxCount,omitempty"`
}

type PreemptionStrategy string

const (
LessThanOrEqualToFinalShare PreemptionStrategy = "LessThanOrEqualToFinalShare"
LessThanInitialShare PreemptionStrategy = "LessThanInitialShare"
)

type FairSharing struct {
// enable indicates whether to enable fair sharing for all cohorts.
// Defaults to false.
Enable bool `json:"enable"`

// preemptionStrategies indicates which constraints should a preemption satisfy.
// The preemption algorithm will only use the next strategy in the list if the
// incoming workload (preemptor) doesn't fit after using the previous strategies.
// Possible values are:
// - LessThanOrEqualToFinalShare: Only preempt a workload if the share of the preemptor CQ
// with the preemptor workload is less than or equal to the share of the preemptee CQ
// without the workload to be preempted.
// This strategy might favor preemption of smaller workloads in the preemptee CQ,
// regardless of priority or start time, in an effort to keep the share of the CQ
// as high as possible.
// - LessThanInitialShare: Only preempt a workload if the share of the preemptor CQ
// with the incoming workload is strictly less than the share of the preemptee CQ.
// This strategy doesn't depend on the share usage of the workload being preempted.
// As a result, the strategy chooses to preempt workloads with the lowest priority and
// newest start time first.
// The default strategy is ["LessThanOrEqualToFinalShare", "LessThanInitialShare"].
PreemptionStrategies []PreemptionStrategy `json:"preemptionStrategies,omitempty"`
}
3 changes: 3 additions & 0 deletions apis/config/v1beta1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,4 +184,7 @@ func SetDefaults_Configuration(cfg *Configuration) {
if cfg.MultiKueue.WorkerLostTimeout == nil {
cfg.MultiKueue.WorkerLostTimeout = &metav1.Duration{Duration: DefaultMultiKueueWorkerLostTimeout}
}
if fs := cfg.FairSharing; fs != nil && fs.Enable && len(fs.PreemptionStrategies) == 0 {
fs.PreemptionStrategies = []PreemptionStrategy{LessThanOrEqualToFinalShare, LessThanInitialShare}
}
}
25 changes: 25 additions & 0 deletions apis/config/v1beta1/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,31 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
},
},
"add default fair sharing configuration when enabled": {
original: &Configuration{
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
FairSharing: &FairSharing{
Enable: true,
},
},
want: &Configuration{
Namespace: ptr.To(DefaultNamespace),
ControllerManager: defaultCtrlManagerConfigurationSpec,
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
FairSharing: &FairSharing{
Enable: true,
PreemptionStrategies: []PreemptionStrategy{LessThanOrEqualToFinalShare, LessThanInitialShare},
},
},
},
}

for name, tc := range testCases {
Expand Down
25 changes: 25 additions & 0 deletions apis/config/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions apis/kueue/v1beta1/clusterqueue_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ type ClusterQueueSpec struct {
// +kubebuilder:validation:Enum=None;Hold;HoldAndDrain
// +kubebuilder:default="None"
StopPolicy *StopPolicy `json:"stopPolicy,omitempty"`

// fairSharing defines the properties of the ClusterQueue when participating in fair sharing.
// The values are only relevant if fair sharing is enabled in the Kueue configuration.
FairSharing *FairSharing `json:"fairSharing,omitempty"`
}

// AdmissionCheckStrategy defines a strategy for a AdmissionCheck.
Expand Down Expand Up @@ -464,6 +468,20 @@ type BorrowWithinCohort struct {
MaxPriorityThreshold *int32 `json:"maxPriorityThreshold,omitempty"`
}

// FairSharing contains the properties of the ClusterQueue when participating in fair sharing.
type FairSharing struct {
// weight gives a comparative advantage to this ClusterQueue when competing for unused
// resources in the cohort against other ClusterQueues.
// The share of a ClusterQueue is based on the dominant resource usage above nominal
// quotas for each resource, divided by the weight.
// Admission prioritizes scheduling workloads from ClusterQueues with the lowest share
// and preempting workloads from the ClusterQueues with the highest share.
// A zero weight implies infinite share value, meaning that this ClusterQueue will always
// be at disadvantage against other ClusterQueues.
// +kubebuilder:default=1
Weight *resource.Quantity `json:"weight,omitempty"`
}

// +genclient
// +genclient:nonNamespaced
// +kubebuilder:object:root=true
Expand Down
25 changes: 25 additions & 0 deletions apis/kueue/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions charts/kueue/templates/crd/kueue.x-k8s.io_clusterqueues.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,28 @@ spec:
maxLength: 253
pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
type: string
fairSharing:
description: |-
fairSharing defines the properties of the ClusterQueue when participating in fair sharing.
The values are only relevant if fair sharing is enabled in the Kueue configuration.
properties:
weight:
anyOf:
- type: integer
- type: string
default: 1
description: |-
weight gives a comparative advantage to this ClusterQueue when competing for unused
resources in the cohort against other ClusterQueues.
The share of a ClusterQueue is based on the dominant resource usage above nominal
quotas for each resource, divided by the weight.
Admission prioritizes scheduling workloads from ClusterQueues with the lowest share
and preempting workloads from the ClusterQueues with the highest share.
A zero weight implies infinite share value, meaning that this ClusterQueue will always
be at disadvantage against other ClusterQueues.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
flavorFungibility:
default: {}
description: |-
Expand Down
3 changes: 3 additions & 0 deletions charts/kueue/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ managerConfig:
# - key: kubernetes.io/metadata.name
# operator: NotIn
# values: [ kube-system, kueue-system ]
# fairSharing:
# enable: true
# preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
# ports definition for metricsService and webhookService.
metricsService:
ports:
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions client-go/applyconfiguration/kueue/v1beta1/fairsharing.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions client-go/applyconfiguration/utils.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions cmd/kueue/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ func setupScheduler(mgr ctrl.Manager, cCache *cache.Cache, queues *queue.Manager
mgr.GetClient(),
mgr.GetEventRecorderFor(constants.AdmissionName),
scheduler.WithPodsReadyRequeuingTimestamp(podsReadyRequeuingTimestamp(cfg)),
scheduler.WithFairSharing(cfg.FairSharing),
)
if err := mgr.Add(sched); err != nil {
setupLog.Error(err, "Unable to add scheduler to manager")
Expand Down
22 changes: 22 additions & 0 deletions config/components/crd/bases/kueue.x-k8s.io_clusterqueues.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,28 @@ spec:
maxLength: 253
pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
type: string
fairSharing:
description: |-
fairSharing defines the properties of the ClusterQueue when participating in fair sharing.
The values are only relevant if fair sharing is enabled in the Kueue configuration.
properties:
weight:
anyOf:
- type: integer
- type: string
default: 1
description: |-
weight gives a comparative advantage to this ClusterQueue when competing for unused
resources in the cohort against other ClusterQueues.
The share of a ClusterQueue is based on the dominant resource usage above nominal
quotas for each resource, divided by the weight.
Admission prioritizes scheduling workloads from ClusterQueues with the lowest share
and preempting workloads from the ClusterQueues with the highest share.
A zero weight implies infinite share value, meaning that this ClusterQueue will always
be at disadvantage against other ClusterQueues.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
flavorFungibility:
default: {}
description: |-
Expand Down
3 changes: 3 additions & 0 deletions config/components/manager/controller_manager_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,6 @@ integrations:
# - key: kubernetes.io/metadata.name
# operator: NotIn
# values: [ kube-system, kueue-system ]
# fairSharing:
# enable: true
# preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
Loading

0 comments on commit 54d1c01

Please sign in to comment.