In K8s, all vault pods in the cluster not able to start up

We have a 3 nodes k8s that has vault with HA installed using raft. Another single node vault installed as standalone VM for autounseal the vault in k8s using transit. The following are the helm value file used to install hashicorp vault on k8s.

csi:
  enabled: true
  pod:
    nodeSelector:
      node-role.kubernetes.io/control-plane: ""
    tolerations:
    - effect: NoSchedule
      key: node-role.kubernetes.io/control-plane
      operator: Exists
  volumeMounts:
  - mountPath: /vault/tls
    name: tls
    readOnly: true
  volumes:
  - name: tls
    secret:
      secretName: vault-cluster-tls
global:
  tlsDisable: false
injector:
  enabled: true
  logLevel: trace
  nodeSelector:
    node-role.kubernetes.io/control-plane: ""
  tolerations:
  - effect: NoSchedule
    key: node-role.kubernetes.io/control-plane
    operator: Exists
server:
  auditStorage:
    enabled: true
  ha:
    enabled: true
    raft:
      config: |
        ui = true
        cluster_name = "vault-integrated-storage"

        listener "tcp" {
          tls_disable = 0
          address = "[::]:8200"
          cluster_address = "[::]:8201"
          tls_cert_file = "/vault/userconfig/vault-cluster-tls/tls.crt"
          tls_key_file = "/vault/userconfig/vault-cluster-tls/tls.key"
        }

        storage "raft" {
          path = "/vault/data"

          retry_join {
            leader_api_addr = "https://vault-0.vault-internal:8200"
            leader_ca_cert_file = "/vault/userconfig/vault-cluster-tls/ca.crt"
            leader_client_cert_file = "/vault/userconfig/vault-cluster-tls/tls.crt"
            leader_client_key_file = "/vault/userconfig/vault-cluster-tls/tls.key"
          }
          retry_join {
            leader_api_addr = "https://vault-1.vault-internal:8200"
            leader_ca_cert_file = "/vault/userconfig/vault-cluster-tls/ca.crt"
            leader_client_cert_file = "/vault/userconfig/vault-cluster-tls/tls.crt"
            leader_client_key_file = "/vault/userconfig/vault-cluster-tls/tls.key"
          }
          retry_join {
            leader_api_addr = "https://vault-2.vault-internal:8200"
            leader_ca_cert_file = "/vault/userconfig/vault-cluster-tls/ca.crt"
            leader_client_cert_file = "/vault/userconfig/vault-cluster-tls/tls.crt"
            leader_client_key_file = "/vault/userconfig/vault-cluster-tls/tls.key"
          }
        }

        seal "transit" {
          address = "<omit>"
          token   = "<omit>"
          disable_renewal = "false"
          key_name = "autounseal"
          mount_path = "transit/"
        }

        service_registration "kubernetes" {}
      enabled: true
      setNodeId: false
    replicas: 3
  ingress:
    annotations:
      nginx.ingress.kubernetes.io/backend-protocol: HTTPS
    enabled: true
    hosts:
    - host: vault.k8s.local
    tls:
    - hosts:
      - vault.k8s.local
      secretName: vault-ingress-tls
  logLevel: trace
  nodeSelector:
    node-role.kubernetes.io/control-plane: ""
  standalone:
    enabled: false
  tolerations:
  - effect: NoSchedule
    key: node-role.kubernetes.io/control-plane
    operator: Exists
  volumeMounts:
  - mountPath: /etc/ssl/certs/serena-ca.crt
    name: cert-store
    subPath: serena-ca.crt
  - mountPath: /vault/userconfig/vault-cluster-tls/
    name: vault-cert
  volumes:
  - name: cert-store
    secret:
      items:
      - key: ca.crt
        path: serena-ca.crt
      secretName: vault-cluster-tls
  - name: vault-cert
    secret:
      secretName: vault-cluster-tls
serverTelemetry:
  serviceMonitor:
    enabled: true
    selectors:
      release: monitoring
ui:
  enabled: true

We noticed that when turned off all the k8s nodes and turn it back on the next day, There are chances that the following logs will shown in all the vault-0 vault-1 and vault-2. Once it happens, it seems there is no way to recover it.

  1. Is it the vault cannot handle all k8s nodes abnormal turned off at the same time?
  2. Is the following error mean the raftdb is corrupted?
2024-08-20T02:28:35.356Z [INFO]  proxy environment: http_proxy="" https_proxy="" no_proxy=""
2024-08-20T02:28:35.356Z [WARN]  storage.raft.fsm: raft FSM db file has wider permissions than needed: needed=-rw------- existing=-rw-rw----
2024-08-20T02:28:35.357Z [DEBUG] storage.raft.fsm: time to open database: elapsed=1.461221ms path=/vault/data/vault.db
panic: assertion failed: Page expected to be: 7128, but self identifies as 0

goroutine 1 [running]:
go.etcd.io/bbolt._assert(...)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/db.go:1359
go.etcd.io/bbolt.(*page).fastCheck(0x7f8e5341f000, 0x1bd8)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/page.go:57 +0x1d9
go.etcd.io/bbolt.(*Tx).page(0x7f8e5340d000?, 0x88b2f20?)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/tx.go:534 +0x7b
go.etcd.io/bbolt.(*Tx).forEachPageInternal(0xc002c78620, {0xc00352d950, 0x3, 0xa}, 0xc00397e480)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/tx.go:546 +0x5d
go.etcd.io/bbolt.(*Tx).forEachPageInternal(0xc002c78620, {0xc00352d950, 0x2, 0xa}, 0xc00397e480)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/tx.go:555 +0xc8
go.etcd.io/bbolt.(*Tx).forEachPageInternal(0xc002c78620, {0xc00352d950, 0x1, 0xa}, 0xc00397e480)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/tx.go:555 +0xc8
go.etcd.io/bbolt.(*Tx).forEachPage(...)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/tx.go:542
go.etcd.io/bbolt.(*Tx).checkBucket(0xc002c78620, 0xc002a8e680, 0xc00397e888, 0xc00397e7c8, {0xcfe3680, 0x13338200}, 0xc0001943c0)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/tx_check.go:83 +0x114
go.etcd.io/bbolt.(*Tx).checkBucket.func2({0x7f8e5341c0d2?, 0xc00397e5b8?, 0xc0003d6b10?})
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/tx_check.go:110 +0x90
go.etcd.io/bbolt.(*Bucket).ForEachBucket(0x0?, 0xc00397e680)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/bucket.go:403 +0x96
go.etcd.io/bbolt.(*Tx).checkBucket(0xc002c78620, 0xc002c78638, 0xc00397e888, 0xc00397e7c8, {0xcfe3680, 0x13338200}, 0xc0001943c0)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/tx_check.go:108 +0x255
go.etcd.io/bbolt.(*DB).freepages(0xc0036c58c8)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/db.go:1181 +0x225
go.etcd.io/bbolt.(*DB).loadFreelist.func1()
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/db.go:412 +0xbb
sync.(*Once).doSlow(0xc0036c5a90?, 0x10?)
        /opt/hostedtoolcache/go/1.22.5/x64/src/sync/once.go:74 +0xc2
sync.(*Once).Do(...)
        /opt/hostedtoolcache/go/1.22.5/x64/src/sync/once.go:65
go.etcd.io/bbolt.(*DB).loadFreelist(0xc0036c58c8?)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/db.go:408 +0x45
go.etcd.io/bbolt.Open({0xc0036b1968, 0x18}, 0x180, 0xc00282ed80)
        /home/runner/go/pkg/mod/go.etcd.io/bbolt@v1.3.7/db.go:290 +0x3ee
github.com/hashicorp/raft-boltdb/v2.New({{0xc0036b1968, 0x18}, 0xc00282ed80, 0x0, 0x1})
        /home/runner/go/pkg/mod/github.com/hashicorp/raft-boltdb/v2@v2.3.0/bolt_store.go:79 +0x45
github.com/hashicorp/vault/physical/raft.NewRaftBackend(0xc00280b200, {0xd084e88, 0xc00280b320})
        /home/runner/work/vault/vault/physical/raft/raft.go:507 +0x613
github.com/hashicorp/vault/command.(*ServerCommand).setupStorage(0xc0036c5208, 0xc003718f08)
        /home/runner/work/vault/vault/command/server.go:811 +0x319
github.com/hashicorp/vault/command.(*ServerCommand).Run(0xc0036c5208, {0xc000196a10, 0x1, 0x1})
        /home/runner/work/vault/vault/command/server.go:1188 +0x10a6
github.com/hashicorp/cli.(*CLI).Run(0xc003365a40)
        /home/runner/go/pkg/mod/github.com/hashicorp/cli@v1.1.6/cli.go:265 +0x5b8
github.com/hashicorp/vault/command.RunCustom({0xc000196a00?, 0x2?, 0x2?}, 0xc0000061c0?)
        /home/runner/work/vault/vault/command/main.go:243 +0x9a6
github.com/hashicorp/vault/command.Run(...)
        /home/runner/work/vault/vault/command/main.go:147
main.main()
        /home/runner/work/vault/vault/main.go:13 +0x47