Hi,
I tried to upgrade my cluster, and somehow - rancher decided to delete two machines at ones (VSPhere provider), causing the progress to get stuck.
I tried to restore the etc-backup (it has worked before in similar situations), and that made rancher “realize” that the nodes that it deleted is gone - so thats good
However, it is not stuck with this:
“waiting for 2 etcd machines to delete”
How can I proceed after this? I have really struggled to find information online about this particular issue. I can’t find any finalizers that seems to block this either.
apiVersion: provisioning.cattle.io/v1
kind: Cluster
metadata:
annotations:
field.cattle.io/creatorId: user-jz9v5
creationTimestamp: '2023-08-17T07:35:08Z'
finalizers:
- wrangler.cattle.io/provisioning-cluster-remove
- wrangler.cattle.io/rke-cluster-remove
- wrangler.cattle.io/cloud-config-secret-remover
generation: 18
managedFields:
- apiVersion: provisioning.cattle.io/v1
fieldsType: FieldsV1
fieldsV1:
f:metadata:
f:finalizers:
v:"wrangler.cattle.io/cloud-config-secret-remover": {}
manager: rancher-v2.7.5-secret-migrator
operation: Update
time: '2023-08-17T07:35:09Z'
- apiVersion: provisioning.cattle.io/v1
fieldsType: FieldsV1
fieldsV1:
f:metadata:
f:finalizers:
.: {}
v:"wrangler.cattle.io/provisioning-cluster-remove": {}
v:"wrangler.cattle.io/rke-cluster-remove": {}
f:spec:
.: {}
f:cloudCredentialSecretName: {}
f:defaultPodSecurityAdmissionConfigurationTemplateName: {}
f:defaultPodSecurityPolicyTemplateName: {}
f:kubernetesVersion: {}
f:localClusterAuthEndpoint:
.: {}
f:caCerts: {}
f:enabled: {}
f:fqdn: {}
f:rkeConfig:
.: {}
f:chartValues:
.: {}
f:rke2-cilium: {}
f:etcd:
.: {}
f:disableSnapshots: {}
f:snapshotRetention: {}
f:snapshotScheduleCron: {}
f:etcdSnapshotRestore:
.: {}
f:generation: {}
f:name: {}
f:restoreRKEConfig: {}
f:machineGlobalConfig:
.: {}
f:cni: {}
f:disable-kube-proxy: {}
f:etcd-expose-metrics: {}
f:machinePoolDefaults: {}
f:machinePools: {}
f:machineSelectorConfig: {}
f:registries:
.: {}
f:configs: {}
f:mirrors: {}
f:upgradeStrategy:
.: {}
f:controlPlaneConcurrency: {}
f:controlPlaneDrainOptions:
.: {}
f:deleteEmptyDirData: {}
f:disableEviction: {}
f:enabled: {}
f:force: {}
f:gracePeriod: {}
f:ignoreDaemonSets: {}
f:skipWaitForDeleteTimeoutSeconds: {}
f:timeout: {}
f:workerConcurrency: {}
f:workerDrainOptions:
.: {}
f:deleteEmptyDirData: {}
f:disableEviction: {}
f:enabled: {}
f:force: {}
f:gracePeriod: {}
f:ignoreDaemonSets: {}
f:skipWaitForDeleteTimeoutSeconds: {}
f:timeout: {}
manager: rancher
operation: Update
time: '2024-02-14T11:21:46Z'
- apiVersion: provisioning.cattle.io/v1
fieldsType: FieldsV1
fieldsV1:
f:status:
.: {}
f:agentDeployed: {}
f:clientSecretName: {}
f:clusterName: {}
f:conditions: {}
f:observedGeneration: {}
f:ready: {}
manager: rancher
operation: Update
subresource: status
time: '2024-02-15T01:21:39Z'
name: dev
namespace: fleet-default
resourceVersion: '158698501'
uid: e9e555af-cfd4-4412-9fda-9dc75c5b650f
spec:
cloudCredentialSecretName: cattle-global-data:cc-nt5vn
defaultPodSecurityAdmissionConfigurationTemplateName: ''
defaultPodSecurityPolicyTemplateName: ''
kubernetesVersion: v1.26.11+rke2r1
localClusterAuthEndpoint:
caCerts: ''
enabled: false
fqdn: ''
rkeConfig:
chartValues:
rke2-cilium: {}
etcd:
disableSnapshots: false
snapshotRetention: 25
snapshotScheduleCron: 0 */5 * * *
etcdSnapshotRestore:
generation: 2
name: dev-etcd-snapshot-dev-pool1-096006f5-vrll6-1707832805-local
restoreRKEConfig: none
machineGlobalConfig:
cni: cilium
disable-kube-proxy: false
etcd-expose-metrics: false
machinePoolDefaults: {}
machinePools:
- controlPlaneRole: true
dynamicSchemaSpec: >-
{"resourceFields":{"boot2dockerUrl":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
URL for boot2docker
image"},"cfgparam":{"type":"array[string]","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"nullable":true,"create":true,"update":true,"description":"vSphere
vm configuration parameters (used for
guestinfo)"},"cloneFrom":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"If
you choose creation type clone a name of what you want to clone is
required"},"cloudConfig":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"Filepath
to a cloud-config yaml file to put into the ISO
user-data"},"cloudinit":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
cloud-init filepath or url to add to guestinfo, filepath will be read
and base64 encoded before
adding"},"contentLibrary":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"If
you choose to clone from a content library template specify the name
of the
library"},"cpuCount":{"type":"string","default":{"stringValue":"2","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
CPU number for docker
VM"},"creationType":{"type":"string","default":{"stringValue":"legacy","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"Creation
type when creating a new virtual machine. Supported values: vm,
template, library,
legacy"},"customAttribute":{"type":"array[string]","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"nullable":true,"create":true,"update":true,"description":"vSphere
custom attribute, format key/value e.g. '200=my custom
value'"},"datacenter":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
datacenter for virtual
machine"},"datastore":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
datastore for virtual
machine"},"datastoreCluster":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
datastore cluster for virtual
machine"},"diskSize":{"type":"string","default":{"stringValue":"20480","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
size of disk for docker VM (in
MB)"},"folder":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
folder for the docker VM. This folder must already exist in the
datacenter"},"hostsystem":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
compute resource where the docker VM will be instantiated. This can be
omitted if using a cluster with
DRS"},"memorySize":{"type":"string","default":{"stringValue":"2048","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
size of memory for docker VM (in
MB)"},"network":{"type":"array[string]","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"nullable":true,"create":true,"update":true,"description":"vSphere
network where the virtual machine will be
attached"},"os":{"type":"string","default":{"stringValue":"linux","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"If
using a non-B2D image you can specify the desired machine
OS"},"password":{"type":"password","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
password"},"pool":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
resource pool for docker
VM"},"sshPassword":{"type":"string","default":{"stringValue":"tcuser","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"If
using a non-B2D image you can specify the ssh
password"},"sshPort":{"type":"string","default":{"stringValue":"22","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"If
using a non-B2D image you can specify the ssh
port"},"sshUser":{"type":"string","default":{"stringValue":"docker","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"If
using a non-B2D image you can specify the ssh
user"},"sshUserGroup":{"type":"string","default":{"stringValue":"staff","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"If
using a non-B2D image the uploaded keys will need chown'ed, defaults
to staff e.g.
docker:staff"},"tag":{"type":"array[string]","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"nullable":true,"create":true,"update":true,"description":"vSphere
tag id e.g.
urn:xxx"},"username":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
username"},"vappIpallocationpolicy":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
vApp IP allocation policy. Supported values are: dhcp, fixed,
transient and
fixedAllocated"},"vappIpprotocol":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
vApp IP protocol for this deployment. Supported values are: IPv4 and
IPv6"},"vappProperty":{"type":"array[string]","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"nullable":true,"create":true,"update":true,"description":"vSphere
vApp
properties"},"vappTransport":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
OVF environment transports to use for properties. Supported values
are: iso and
com.vmware.guestInfo"},"vcenter":{"type":"string","default":{"stringValue":"","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
IP/hostname for
vCenter"},"vcenterPort":{"type":"string","default":{"stringValue":"443","intValue":0,"boolValue":false,"stringSliceValue":null},"create":true,"update":true,"description":"vSphere
Port for vCenter"}}}
etcdRole: true
machineConfigRef:
kind: VmwarevsphereConfig
name: nc-dev-pool1-8lmwh
machineOS: linux
name: pool1
quantity: 4
unhealthyNodeTimeout: 0s
workerRole: true
machineSelectorConfig:
- config:
protect-kernel-defaults: false
registries:
configs: {}
mirrors: {}
upgradeStrategy:
controlPlaneConcurrency: '1'
controlPlaneDrainOptions:
deleteEmptyDirData: true
disableEviction: false
enabled: true
force: true
gracePeriod: -1
ignoreDaemonSets: true
skipWaitForDeleteTimeoutSeconds: 0
timeout: 120
workerConcurrency: '1'
workerDrainOptions:
deleteEmptyDirData: true
disableEviction: false
enabled: true
force: true
gracePeriod: -1
ignoreDaemonSets: true
skipWaitForDeleteTimeoutSeconds: 0
timeout: 120
status:
agentDeployed: true
clientSecretName: dev-kubeconfig
clusterName: c-m-vrtks6ms
conditions:
- lastUpdateTime: '2023-08-17T12:40:00Z'
status: 'False'
type: Reconciling
- lastUpdateTime: '2023-08-17T07:35:09Z'
status: 'False'
type: Stalled
- lastUpdateTime: '2024-02-14T11:21:46Z'
status: 'True'
type: Created
- lastUpdateTime: '2024-02-15T01:21:39Z'
status: 'True'
type: RKECluster
- lastUpdateTime: '2023-08-17T07:35:09Z'
status: 'True'
type: BackingNamespaceCreated
- lastUpdateTime: '2023-08-17T07:35:09Z'
status: 'True'
type: DefaultProjectCreated
- lastUpdateTime: '2023-08-17T07:35:09Z'
status: 'True'
type: SystemProjectCreated
- lastUpdateTime: '2023-08-17T07:35:09Z'
status: 'True'
type: InitialRolesPopulated
- lastUpdateTime: '2023-08-17T07:35:09Z'
status: 'True'
type: CreatorMadeOwner
- lastUpdateTime: '2024-02-14T11:14:18Z'
message: waiting for 2 etcd machines to delete
reason: Waiting
status: Unknown
type: Updated
- lastUpdateTime: '2024-02-14T11:14:18Z'
message: waiting for 2 etcd machines to delete
reason: Waiting
status: Unknown
type: Provisioned
- lastUpdateTime: '2024-02-14T10:28:10Z'
message: Cluster agent is not connected
reason: Disconnected
status: 'False'
type: Ready
- lastUpdateTime: '2023-08-17T07:35:10Z'
status: 'True'
type: NoDiskPressure
- lastUpdateTime: '2023-08-17T07:35:10Z'
status: 'True'
type: NoMemoryPressure
- lastUpdateTime: '2023-08-17T07:35:11Z'
status: 'True'
type: SecretsMigrated
- lastUpdateTime: '2023-08-17T07:35:11Z'
status: 'True'
type: ServiceAccountSecretsMigrated
- lastUpdateTime: '2023-08-17T07:35:11Z'
status: 'True'
type: RKESecretsMigrated
- lastUpdateTime: '2023-08-17T07:35:11Z'
status: 'True'
type: ACISecretsMigrated
- lastUpdateTime: '2024-02-14T10:28:07Z'
status: 'False'
type: Connected
- lastUpdateTime: '2023-08-17T12:39:45Z'
status: 'True'
type: GlobalAdminsSynced
- lastUpdateTime: '2023-08-17T12:39:53Z'
status: 'True'
type: SystemAccountCreated
- lastUpdateTime: '2023-08-17T12:39:53Z'
status: 'True'
type: AgentDeployed
- lastUpdateTime: '2023-08-17T12:40:00Z'
status: 'True'
type: Waiting
observedGeneration: 18
ready: true