From 650d2f2de88204dc8cd8ebb74c913762f4b280e3 Mon Sep 17 00:00:00 2001 From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com> Date: Wed, 14 Feb 2024 09:42:00 -0500 Subject: [PATCH] Prepare release notes and files for v0.6 (#1733) Change-Id: I325b633421e009cd3f7ae5b48ddcf0d3da46741d --- CHANGELOG/CHANGELOG-0.6.md | 82 ++++++++++++++++++++ Makefile | 4 +- README.md | 2 +- charts/kueue/Chart.yaml | 2 +- config/components/manager/kustomization.yaml | 2 +- site/config.toml | 2 +- 6 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 CHANGELOG/CHANGELOG-0.6.md diff --git a/CHANGELOG/CHANGELOG-0.6.md b/CHANGELOG/CHANGELOG-0.6.md new file mode 100644 index 0000000000..b39a971b1a --- /dev/null +++ b/CHANGELOG/CHANGELOG-0.6.md @@ -0,0 +1,82 @@ +## v0.6.0 + +Changes since `v0.5.0`: + +### API Change + +- A `stopPolicy` field in the ClusterQueue allows to hold or drain a ClusterQueue (#1299, @trasc) +- Add a lendingLimit field in ClusterQueue's quotas, to allow restricting how much of the unused resources by the ClusterQueue can be borrowed by other ClusterQueues in the cohort. + In other words, this allows a quota equal to `nominal-lendingLimit` to be exclusively used by the ClusterQueue. (#1385, @B1F030) +- Add validation for clusterQueue: when cohort is empty, borrowingLimit must be nil. (#1525, @B1F030) +- Allow decrease reclaimable pods to 0 for suspended job (#1277, @yaroslava-serdiuk) +- MultiKueue: Add Path location type for cluster KubeConfigs. (#1640, @trasc) +- MultiKueue: Add garbage collection of deleted Workloads. (#1643, @trasc) +- MultiKueue: Multi cluster job dispatching for k8s Job. This doesn't include support for live status updates. (#1313, @trasc) +- Support for a mechanism to suspend a running Job without requeueing (#1252, @vicentefb) +- Support for preemption while borrowing (#1397, @mimowo) +- The leaderElection field in the Configuration API is now defaulted. + Leader election is now enabled by default. (#1598, @astefanutti) +- Visibility API: Add an endpoint that allows a user to fetch information about pending workloads and their position in LocalQueue. (#1365, @PBundyra) +- Visibility API: Introduce an on-demand API endpoint for fetching pending workloads in a ClusterQueue. (#1251, @PBundyra) +- Visibility API: extend the information returned for the pending workloads in a ClusterQueue, including the workload position in the queue. (#1362, @PBundyra) +- WaitForPodsReady: Add a config field to allow admins to configure the timestamp used when sorting workloads that were evicted due to their Pods not becoming ready on time. (#1542, @nstogner) +- WaitForPodsReady: Support a backoff re-queueing mechanism with configurable limit. (#1709, @tenzen-y) + +### Feature + +- Add Prebuilt Workload support for JobSets. (#1575, @trasc) +- Add events for transitions of the provisioning AdmissionCheck (#1271, @stuton) +- Add prebuilt workload support for batch/job. (#1358, @trasc) +- Add support for groups of plain Pods. (#1319, @achernevskii) +- Allow configuring featureGates on helm charts. (#1314, @B1F030) +- At log level 6, the usage of ClusterQueues and cohorts is included in logs. + + The status of the internal cache and queues is also logged on demand when a SIGUSR2 is sent to kueue, regardless of the log level. (#1528, @alculquicondor) +- Changing tolerations in an inadmissible job triggers an admission retry with the updated tolerations. (#1304, @stuton) +- Increase the default number of reconcilers for Pod and Workload objects to 5, each. (#1589, @alculquicondor) +- Jobs preserve their position in the queue if the number of pods change before being admitted (#1223, @yaroslava-serdiuk) +- Make the image build setting CGO_ENABLED configurable (#1391, @anishasthana) +- MultiKueue: Add live status updates for multikueue JobSets (#1668, @trasc) +- MultiKueue: Support for JobSets. (#1606, @trasc) +- Support RayCluster as a queue-able workload in Kueue (#1520, @vicentefb) +- Support for retry of provisioning request. + + When `ProvisioningACC` is enabled, and there are existing ProvisioningRequests, they are going to be recreated. + This may cause job evictions for some long-running jobs which were using the ProvisioningRequests. (#1351, @mimowo) +- The image gcr.io/k8s-staging-kueue/debug:main, along with the script ./hack/dump_cache.sh can be used to trigger a dump of the internal cache into the logs. (#1541, @alculquicondor) +- The priority sorting within the cohort could be disabled by setting the feature gate PrioritySortingWithinCohort to false (#1406, @yaroslava-serdiuk) +- Visibility API: Add HA support. (#1554, @astefanutti) + +### Bug or Regression + +- Add Missing RBAC on finalizer sub-resources for job integrations. (#1486, @astefanutti) +- Add Mutating WebhookConfigurations for the AdmissionCheck, RayJob, and JobSet to helm charts (#1567, @B1F030) +- Add Validating/Mutating WebhookConfigurations for the KubeflowJobs like PyTorchJob (#1460, @tenzen-y) +- Added event for QuotaReserved and fixed event for Admitted to trigger when admission checks complete (#1436, @trasc) +- Avoid finished Workloads from blocking quota after a Kueue restart (#1689, @trasc) +- Avoid recreating a Workload for a finished Job and finalize a job when the workload is declared finished. (#1383, @achernevskii) +- Do not (re)create ProvReq if the state of admission check is Ready (#1617, @mimowo) +- Fix Kueue crashing at the log level 6 when re-admitting workloads (#1644, @mimowo) +- Fix a bug in the pod integration that unexpected errors will occur when the pod isn't found (#1512, @achernevskii) +- Fix a bug that plain pods managed by kueue will remain in a terminating state, due to a finalizer (#1342, @tenzen-y) +- Fix client-go libraries bug that can not operate clusterScoped resources like ClusterQueue and ResourceFlavor. (#1294, @tenzen-y) +- Fix fungibility policy `Preempt` where it was not able to utilize the next flavor if preemption was not possible. (#1366, @alculquicondor) +- Fix handling of preemption within a cohort when there is no borrowingLimit. In that case, + during preemption, the permitted resources to borrow were calculated as if borrowingLimit=0, instead of unlimited. + + As a consequence, when using `reclaimWithinCohort`, it was possible that a workload, scheduled to ClusterQueue with no borrowingLimit, would preempt more workloads than needed, even though it could fit by borrowing. (#1561, @mimowo) +- Fix the synchronization of the admission check state based on recreated ProvisioningRequest (#1585, @mimowo) +- Fixed fungibility policy `whenCanPreempt: Preempt`. The admission should happen in the flavor for which preemptions were issued. (#1332, @alculquicondor) +- Kueue replicas are advertised as Ready only once the webhooks are functional. + + This allows users to wait with the first requests until the Kueue deployment is available, so that the + early requests don't fail. (#1676, @mimowo) +- Pending workload from StrictFIFO ClusterQueue doesn't block borrowing from other ClusterQueues (#1399, @yaroslava-serdiuk) +- Remove deleted pending workloads from the cache (#1679, @astefanutti) +- Remove finalizer from Workloads that are orphaned (have no owners). (#1523, @achernevskii) +- Trigger an eviction for an admitted Job after an admission check changed state to Rejected. (#1562, @trasc) +- Webhooks are served in non-leading replicas (#1509, @astefanutti) + +### Other (Cleanup or Flake) + +- Expose utilization functions to setup jobframework reconcilers and webhooks (#1630, @tenzen-y) \ No newline at end of file diff --git a/Makefile b/Makefile index 425a1945ef..cb919826f9 100644 --- a/Makefile +++ b/Makefile @@ -83,8 +83,8 @@ LD_FLAGS += -X '$(version_pkg).GitCommit=$(shell git rev-parse HEAD)' # Update these variables when preparing a new release or a release branch. # Then run `make prepare-release-branch` -RELEASE_VERSION=v0.5.3 -RELEASE_BRANCH=main +RELEASE_VERSION=v0.6.0 +RELEASE_BRANCH=release-0.6 .PHONY: all all: generate fmt vet build diff --git a/README.md b/README.md index bd92508549..5ce449faf5 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ Read the [overview](https://kueue.sigs.k8s.io/docs/overview/) to learn more. To install the latest release of Kueue in your cluster, run the following command: ```shell -kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.5.3/manifests.yaml +kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.6.0/manifests.yaml ``` The controller runs in the `kueue-system` namespace. diff --git a/charts/kueue/Chart.yaml b/charts/kueue/Chart.yaml index 69476c0564..3c9ed1ba6d 100644 --- a/charts/kueue/Chart.yaml +++ b/charts/kueue/Chart.yaml @@ -18,4 +18,4 @@ version: 0.1.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "v0.5.3" +appVersion: "v0.6.0" diff --git a/config/components/manager/kustomization.yaml b/config/components/manager/kustomization.yaml index b7b7487da3..8848b738b3 100644 --- a/config/components/manager/kustomization.yaml +++ b/config/components/manager/kustomization.yaml @@ -17,4 +17,4 @@ kind: Kustomization images: - name: controller newName: gcr.io/k8s-staging-kueue/kueue - newTag: main + newTag: release-0.6 diff --git a/site/config.toml b/site/config.toml index 882e741af2..a90adbddd7 100644 --- a/site/config.toml +++ b/site/config.toml @@ -92,7 +92,7 @@ ignoreFiles = [] # The major.minor version tag for the version of the docs represented in this # branch of the repository. Used in the "version-banner" partial to display a # version number for this doc set. - version = "v0.5.3" + version = "v0.6.0" # Flag used in the "version-banner" partial to decide whether to display a # banner on every page indicating that this is an archived version of the docs.