Hello everyone.
After enabling ACL on my cluster, the Nomad Clients are unable to reconnect to the server cluster. Mi cluster tiene Consul 1.15.1
The implementation was done directly in v1.5.6 (Server and client).
The error I have is:
2023-06-06T13:36:07.414Z [WARN] client.server_mgr: no servers available
2023-06-06T13:36:07.469Z [WARN] client.server_mgr: no servers available
2023-06-06T13:36:07.487Z [WARN] client.server_mgr: no servers available
2023-06-06T13:36:07.540Z [WARN] client.server_mgr: no servers available
2023-06-06T13:36:07.540Z [ERROR] client.rpc: error performing RPC to server, deadline exceeded, cannot retry: error="no servers" rpc=Node.Register
2023-06-06T13:36:07.540Z [DEBUG] client: registration waiting on servers
2023-06-06T13:36:07.541Z [ERROR] client: error discovering nomad servers: error="client.consul: unable to query Consul datacenters: Get \"https://127.0.0.1:8501/v1/catalog/datacenters\": dial tcp 127.0.0.1:8501: connect: connection refused"
2023-06-06T13:36:07.684Z [WARN] client.server_mgr: no servers available
2023-06-06T13:36:07.765Z [WARN] client.server_mgr: no servers available
2023-06-06T13:36:07.765Z [ERROR] client.rpc: error performing RPC to server, deadline exceeded, cannot retry: error="no servers" rpc=Node.GetClientAllocs
2023-06-06T13:36:07.822Z [WARN] client.server_mgr: no servers available
2023-06-06T13:36:07.943Z [WARN] client.server_mgr: no servers available
2023-06-06T13:36:07.943Z [ERROR] client.rpc: error performing RPC to server, deadline exceeded, cannot retry: error="no servers" rpc=Node.UpdateAlloc
2023-06-06T13:36:07.943Z [ERROR] client: error updating allocations: error="no servers"
My configuration files are the next:
- NOMAD - SERVER (nomad.hcl).
datacenter = "dc1"
data_dir = "/opt/nomad"
server {
enabled = true
bootstrap_expect = 3
raft_protocol = 3
}
advertise {
http = "{{GetInterfaceIP \"eth0\"}}"
rpc = "{{GetInterfaceIP \"eth0\"}}"
serf = "{{GetInterfaceIP \"eth0\"}}"
}
tls {
http = true
rpc = true
ca_file = "/etc/nomad.d/ca.pem"
cert_file = "/etc/nomad.d/erver.pem"
key_file = "/etc/nomad.d/server-key.pem"
verify_server_hostname = false
verify_https_client = false
}
acl {
enabled = true
}
- CONSUL - SERVER (consul.hcl).
"node_name"= "hashi-server-dev-1",
"server"= true,
"data_dir"= "/opt/consul",
"bind_addr"= "{{ GetInterfaceIP \"eth0\" }}",
"rejoin_after_leave"= true,
"client_addr"= "0.0.0.0",
"bootstrap_expect"= 3,
"retry_join" = ["provider=aws tag_key=auto-join tag_value=consul-dev"],
"encrypt" = "pc08zoU1C+k=",
"enable_local_script_checks"= true,
"ui_config"= {
enabled= true
metrics_provider= "prometheus"
},
"performance"= {
"raft_multiplier"= 1
},
"ports"= {
"https"= 8501
},
"tls"= {
defaults {
"verify_incoming"= true,
"verify_outgoing"= true,
"ca_file"= "/etc/consul.d/ca.pem",
"cert_file"= "/etc/consul.d/server.pem",
"key_file"= "/etc/consul.d/server-key.pem"
}
internal_rpc {
verify_server_hostname = true
}
}
acl {
enabled = true
default_policy = "deny"
down_policy = "extend-cache"
tokens {
default = "2d87f5af-c1a4-7502-2e60-6602f15886f6"
}
}
- NOMAD - CLIENT (nomad.hcl).
datacenter = "dc1"
data_dir = "/opt/nomad"
log_level = "DEBUG"
client {
enabled = true
options = {
"docker.auth.config" = "/root/.docker/config.json"
"docker.auth.helper" = "ecr-login"
}
host_network "localhost" {
interface = "lo"
}
host_volume "monolith_public_key" {
path = "/data/monolith/files/oauth-public.key"
read_only = false
}
host_volume "monolith_private_key" {
path = "/data/monolith/files/oauth-private.key"
read_only = false
}
host_volume "datadog_sock" {
path = "/var/run/docker.sock"
read_only = false
}
host_volume "datadog_proc" {
path = "/proc/"
read_only = false
}
host_volume "datadog_cgroup" {
path = "/sys/fs/cgroup/"
read_only = false
}
}
acl {
enabled = true
}
consul {
address = "127.0.0.1:8501"
ssl = true
ca_file = "/etc/nomad.d/rca.pem"
cert_file = "/etc/nomad.d/client.pem"
key_file = "/etc/nomad.d/client-key.pem"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
tls {
http = true
rpc = tru
ca_file = "/etc/nomad.d/referwell-hashi-cluster-ca.pem"
cert_file = "/etc/nomad.d/referwell-hashi-cluster-client.pem"
key_file = "/etc/nomad.d/referwell-hashi-cluster-client-key.pem"
verify_server_hostname = false
verify_https_client = false
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}
- CONSUL - CLIENT (nomad.hcl).
{
"encrypt" : "pc08zoU1C+k=",
"bind_addr": "{{ GetInterfaceIP \"eth0\" }}",
"node_name": "hashi-client-dev-1",
"data_dir": "/opt/consul",
"rejoin_after_leave": true,
"enable_local_script_checks": true,
"retry_join" : ["provider=aws tag_key=auto-join tag_value=consul-dev"],
"verify_incoming": true,
"verify_outgoing": true,
"verify_server_hostname": true,
"ca_file": "/etc/consul.d/ca.pem",
"cert_file": "/etc/consul.d/client.pem",
"key_file": "/etc/consul.d/client-key.pem",
"acl": {
"enabled": true,
"default_policy": "deny",
"down_policy": "extend-cache",
"tokens": {
"default": "d562fe45-faf5-dafd-7bf2-3c0368e9afc9"
}
}
}
I have created the following policies and tokens in my Consul cluster.
# consul acl token list
AccessorID: 6eb65d08-b1b5-20b5-dec4-2b0764c24bbf
SecretID: d562fe45-faf5-dafd-7bf2-3c0368e9afc9
Description: consul-client
Local: false
Create Time: 2023-06-05 16:36:38.795775571 +0000 UTC
Policies:
74b6958c-bd28-0250-ecfe-b3f91b77d380 - consul-client
[ec2-user@ip-10-102-2-234 ~]$ consul acl policy read -id 74b6958c-bd28-0250-ecfe-b3f91b77d380
ID: 74b6958c-bd28-0250-ecfe-b3f91b77d380
Name: consul-client
Description:
Datacenters:
Rules:
agent_prefix "hashi-client" {
policy = "write"
}
node_prefix "hashi-client" {
policy = "write"
}
key_prefix "app-configs" {
policy = "read"
}
key_prefix "traefik" {
policy = "read"
}
service_prefix "" {
policy = "write"
}
Please support me in understanding what is happening.
Thanks.