Cannot create vault cluster with manual unseal

Hello,

I have the following vault configuration:

ui = true
storage "raft" {
        path = "/opt/vault/data"
        retry_join {
        # 'leader_api_addr' means 'address of a possible leader node'
                leader_api_addr = "https://vault-0.node.company.internal:8200"
                leader_ca_cert_file = "/opt/vault/tls/ca.crt"
                leader_client_cert_file = "/opt/vault/tls/tls.crt"
                leader_client_key_file = "/opt/vault/tls/tls.key"
        }
        retry_join {
                leader_api_addr = "https://vault-1.node.company.internal:8200"
                leader_ca_cert_file = "/opt/vault/tls/ca.crt"
                leader_client_cert_file = "/opt/vault/tls/tls.crt"
                leader_client_key_file = "/opt/vault/tls/tls.key"
        }
        retry_join {
                leader_api_addr = "https://vault-2.node.company.internal:8200"
                leader_ca_cert_file = "/opt/vault/tls/ca.crt"
                leader_client_cert_file = "/opt/vault/tls/tls.crt"
                leader_client_key_file = "/opt/vault/tls/tls.key"
        }
}


# HTTPS listener
listener "tcp" {
  address       = "0.0.0.0:8200"
  tls_cert_file = "/opt/vault/tls/tls.crt"
  tls_key_file  = "/opt/vault/tls/tls.key"
  # tls_ca_file = "/opt/vault/tls/ca.crt"
  tls_require_and_verify_client_cert = "true"
  tls_client_ca_file = "/opt/vault/tls/ca.crt"
  tls_min_version = "tls12"
}

# HA parameters
api_addr = "https://vault-0.node.company.internal:8200"
cluster_addr = "https://vault-0.node.company.internal:8201"

For the second and third node (vault-1 and vault-2) the configuration is identical except for the HA parameters where I adapt it accordingly.

I’m not sure how I need to go about it. How I expect it to work is this: I start all nodes, I initialize only one, unseal it manually. At this point I expect that the initial node sees the other nodes in the raft cluster.Tthen I should be able to unseal the other two nodes with the same recovery keys that I get from the node that I’ve initialize.

That doesn’t work, though.
This is what I keep getting from vault-1:

Aug 04 10:56:17 vault-1 vault[35447]: 2022-08-04T10:56:17.545+0300 [ERROR] core: failed to retry join raft cluster: retry=2s err="waiting for unseal keys to be supplied"
Aug 04 10:56:19 vault-1 vault[35447]: 2022-08-04T10:56:19.548+0300 [INFO]  core: security barrier not initialized
Aug 04 10:56:19 vault-1 vault[35447]: 2022-08-04T10:56:19.551+0300 [INFO]  core: attempting to join possible raft leader node: leader_addr=https://vault-0.node.company.internal:8200
Aug 04 10:56:19 vault-1 vault[35447]: 2022-08-04T10:56:19.551+0300 [INFO]  core: attempting to join possible raft leader node: leader_addr=https://vault-1.node.company.internal:8200
Aug 04 10:56:19 vault-1 vault[35447]: 2022-08-04T10:56:19.551+0300 [INFO]  core: attempting to join possible raft leader node: leader_addr=https://vault-2.node.company.internal:8200
Aug 04 10:56:19 vault-1 vault[35447]: 2022-08-04T10:56:19.557+0300 [ERROR] core: failed to get raft challenge: leader_addr=https://vault-1.node.company.internal:8200
Aug 04 10:56:19 vault-1 vault[35447]:   error=
Aug 04 10:56:19 vault-1 vault[35447]:   | error during raft bootstrap init call: Error making API request.
Aug 04 10:56:19 vault-1 vault[35447]:   |
Aug 04 10:56:19 vault-1 vault[35447]:   | URL: PUT https://vault-1.node.company.internal:8200/v1/sys/storage/raft/bootstrap/challenge
Aug 04 10:56:19 vault-1 vault[35447]:   | Code: 503. Errors:
Aug 04 10:56:19 vault-1 vault[35447]:   |
Aug 04 10:56:19 vault-1 vault[35447]:   | * Vault is sealed
Aug 04 10:56:19 vault-1 vault[35447]:
Aug 04 10:56:19 vault-1 vault[35447]: 2022-08-04T10:56:19.558+0300 [ERROR] core: failed to retry join raft cluster: retry=2s err="waiting for unseal keys to be supplied"
Aug 04 10:56:19 vault-1 vault[35447]: 2022-08-04T10:56:19.560+0300 [ERROR] core: failed to get raft challenge: leader_addr=https://vault-2.node.company.internal:8200
Aug 04 10:56:19 vault-1 vault[35447]:   error=
Aug 04 10:56:19 vault-1 vault[35447]:   | error during raft bootstrap init call: Error making API request.
Aug 04 10:56:19 vault-1 vault[35447]:   |
Aug 04 10:56:19 vault-1 vault[35447]:   | URL: PUT https://vault-2.node.company.internal:8200/v1/sys/storage/raft/bootstrap/challenge
Aug 04 10:56:19 vault-1 vault[35447]:   | Code: 503. Errors:
Aug 04 10:56:19 vault-1 vault[35447]:   |
Aug 04 10:56:19 vault-1 vault[35447]:   | * Vault is sealed
Aug 04 10:56:19 vault-1 vault[35447]:
Aug 04 10:56:21 vault-1 vault[35447]: 2022-08-04T10:56:21.558+0300 [INFO]  core: security barrier not initialized
Aug 04 10:56:21 vault-1 vault[35447]: 2022-08-04T10:56:21.559+0300 [INFO]  core: attempting to join possible raft leader node: leader_addr=https://vault-0.node.company.internal:8200
Aug 04 10:56:21 vault-1 vault[35447]: 2022-08-04T10:56:21.559+0300 [INFO]  core: attempting to join possible raft leader node: leader_addr=https://vault-2.node.company.internal:8200
Aug 04 10:56:21 vault-1 vault[35447]: 2022-08-04T10:56:21.559+0300 [INFO]  core: attempting to join possible raft leader node: leader_addr=https://vault-1.node.company.internal:8200
Aug 04 10:56:21 vault-1 vault[35447]: 2022-08-04T10:56:21.566+0300 [ERROR] core: failed to get raft challenge: leader_addr=https://vault-2.node.company.internal:8200
Aug 04 10:56:21 vault-1 vault[35447]:   error=
Aug 04 10:56:21 vault-1 vault[35447]:   | error during raft bootstrap init call: Error making API request.
Aug 04 10:56:21 vault-1 vault[35447]:   |
Aug 04 10:56:21 vault-1 vault[35447]:   | URL: PUT https://vault-2.node.company.internal:8200/v1/sys/storage/raft/bootstrap/challenge
Aug 04 10:56:21 vault-1 vault[35447]:   | Code: 503. Errors:
Aug 04 10:56:21 vault-1 vault[35447]:   |
Aug 04 10:56:21 vault-1 vault[35447]:   | * Vault is sealed
Aug 04 10:56:21 vault-1 vault[35447]:
Aug 04 10:56:21 vault-1 vault[35447]: 2022-08-04T10:56:21.567+0300 [ERROR] core: failed to get raft challenge: leader_addr=https://vault-1.node.company.internal:8200
Aug 04 10:56:21 vault-1 vault[35447]:   error=
Aug 04 10:56:21 vault-1 vault[35447]:   | error during raft bootstrap init call: Error making API request.
Aug 04 10:56:21 vault-1 vault[35447]:   |
Aug 04 10:56:21 vault-1 vault[35447]:   | URL: PUT https://vault-1.node.company.internal:8200/v1/sys/storage/raft/bootstrap/challenge
Aug 04 10:56:21 vault-1 vault[35447]:   | Code: 503. Errors:
Aug 04 10:56:21 vault-1 vault[35447]:   |
Aug 04 10:56:21 vault-1 vault[35447]:   | * Vault is sealed
Aug 04 10:56:21 vault-1 vault[35447]:
Aug 04 10:56:21 vault-1 vault[35447]: 2022-08-04T10:56:21.568+0300 [ERROR] core: failed to retry join raft cluster: retry=2s err="waiting for unseal keys to be supplied"
Aug 04 10:56:23 vault-1 vault[35447]: 2022-08-04T10:56:23.569+0300 [INFO]  core: security barrier not initialized
Aug 04 10:56:23 vault-1 vault[35447]: 2022-08-04T10:56:23.571+0300 [INFO]  core: attempting to join possible raft leader node: leader_addr=https://vault-0.node.company.internal:8200
Aug 04 10:56:23 vault-1 vault[35447]: 2022-08-04T10:56:23.571+0300 [INFO]  core: attempting to join possible raft leader node: leader_addr=https://vault-1.node.company.internal:8200
Aug 04 10:56:23 vault-1 vault[35447]: 2022-08-04T10:56:23.572+0300 [INFO]  core: attempting to join possible raft leader node: leader_addr=https://vault-2.node.company.internal:8200
Aug 04 10:56:23 vault-1 vault[35447]: 2022-08-04T10:56:23.576+0300 [ERROR] core: failed to get raft challenge: leader_addr=https://vault-1.node.company.internal:8200
Aug 04 10:56:23 vault-1 vault[35447]:   error=
Aug 04 10:56:23 vault-1 vault[35447]:   | error during raft bootstrap init call: Error making API request.
Aug 04 10:56:23 vault-1 vault[35447]:   |
Aug 04 10:56:23 vault-1 vault[35447]:   | URL: PUT https://vault-1.node.company.internal:8200/v1/sys/storage/raft/bootstrap/challenge

Any ideas how I can go around this issue?

Ok, never mind, this time it has worked. I don’t know what it is about writing posts that solve the actual problem.
Last time I remember distinctly that, when I tied unsealing the rest of the nodes, I got the following error:

failed to create cipher: crypto/aes: invalid key size 0
Or I simply got a timeout.

i have the exact same issue, also the config is almost the same as mine. did you figure out what the problem was the last time?

I don’t remember unfortunately. What I remember vaguely was that some errors were misleading and that the root cause was different than what the errors might lead you to believe. But some scenarios might be overlapping in my mind.
Of course I would check if the key actually is right.

I can offer you my current configuration, which uses an additional vault transit key node through which I unseal the cluster. This is the config file of the first cluster node:

listener "tcp" {
  address       = "0.0.0.0:8200"
  tls_cert_file = "/opt/vault/tls/tls.crt"
  tls_key_file  = "/opt/vault/tls/tls.key"
  # tls_require_and_verify_client_cert = "true"
  # tls_client_ca_file = "/opt/vault/tls/ca.crt"
  tls_min_version = "tls12"
}

storage "raft" {
        path = "/opt/vault/data"
	        retry_join {
        # 'leader_api_addr' means 'address of a possible leader node'
                leader_api_addr = "https://omni-vault-0.node.company.lan:8200"
                leader_ca_cert_file = "/opt/vault/tls/ca.crt"
                leader_client_cert_file = "/opt/vault/tls/tls.crt"
                leader_client_key_file = "/opt/vault/tls/tls.key"
        }
	        retry_join {
        # 'leader_api_addr' means 'address of a possible leader node'
                leader_api_addr = "https://omni-vault-1.node.company.lan:8200"
                leader_ca_cert_file = "/opt/vault/tls/ca.crt"
                leader_client_cert_file = "/opt/vault/tls/tls.crt"
                leader_client_key_file = "/opt/vault/tls/tls.key"
        }
	        retry_join {
        # 'leader_api_addr' means 'address of a possible leader node'
                leader_api_addr = "https://omni-vault-2.node.company.lan:8200"
                leader_ca_cert_file = "/opt/vault/tls/ca.crt"
                leader_client_cert_file = "/opt/vault/tls/tls.crt"
                leader_client_key_file = "/opt/vault/tls/tls.key"
        }
	}

seal "transit" {
  address = "https://omni-vault-transit.node.company.lan:8200"
  disable_renewal = "false"
  key_name = "autounseal"
  mount_path = "transit/"
  tls_ca_cert = "/opt/vault/tls/ca.crt"
  tls_client_cert = "/opt/vault/tls/tls.crt"
  tls_client_key = "/opt/vault/tls/tls.key"
  tls_server_name = "omni-vault-transit.node.company.lan"
  tls_skip_verify = "false"
  token = "hvs.token"
}

# HA parameters
api_addr = "https://omni-vault-0.node.company.lan:8200"
cluster_addr = "https://omni-vault-0.node.company.lan:8201"
1 Like

Alright, I’ve identified the issue: I forgot to enable TCP port 8201 in our firewall settings.

1 Like