Hi,
I was integrating consul + nomad for service discovery.
I use nomad version 1.1.6 and consul 1.9.0
Sometimes, when we’re adding a nomad node (client) to our cluster using autoscaling group from aws, the consul adds a health check using server type to the nomad node, instead using the client type. This anomaly causes the node to be unhealthy.
HTTP GET http://0.0.0.0:4646/v1/agent/health?type=server: 500 Internal Server Error Output: {"server":{"ok":false,"message":"server not enabled"}}
the config of the nomad agent is attached below
name = "node-servicename-instanceid"
data_dir = "/opt/nomad/data"
datacenter = "production"
bind_addr = "0.0.0.0"
log_file = "/opt/nomad/data/logs/"
log_rotate_duration = "24h"
log_rotate_max_files = 14
advertise {
http = "x.x.x.x"
rpc = "x.x.x.x"
serf = "x.x.x.x"
}
client {
enabled = true
servers = ["x.x.x.x:4647"]
meta {
service = "roro-server"
}
node_class= "roro-server"
}
plugin "raw_exec" {
config {
enabled = true
}
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}
plugin "docker" {
config {
auth {
helper = "ecr-login"
config = "/home/ubuntu/.docker/config.json"
}
infra_image = "pause-amd64:3.1"
}
}
consul {
address = "127.0.0.1:8500"
checks_use_advertise = true
server_service_name = "nomad"
client_service_name = "nomad"
auto_advertise = true
server_auto_join = true
client_auto_join = true
the config of consul agent is described below
{
"server": false,
"client_addr": "0.0.0.0",
"advertise_addr": "externalip",
"data_dir": "/var/consul",
"enable_local_script_checks": true,
"node_name": "node-servicename-instanceid",
"datacenter": "production-cluster",
"enable_syslog": true,
"log_file": "/var/log/consul/",
"log_rotate_duration": "24h",
"log_rotate_max_files": -1,
"acl": {
"enabled": true,
"enable_token_persistence": true,
"tokens": {
"default": "token"
}
},
"ports": {
"grpc": 8502
},
"connect": {
"enabled": true
},
"retry_join": ["ip1", "ip2", "ip3"],
"services": [
{
"name": "dnsmasq",
"token": "token",
"tags": ["dnsmasq"],
"checks": [
{
"id": "dnsmasq",
"name": "dnsmasq service check",
"args": ["/etc/consul/dnsmasq-health-check.sh"],
"interval": "10s",
"timeout": "1s"
}
]
},
{
"name": "node_exporter",
"token": "token",
"tags": ["node_exporter", "node-roro-server"],
"port": 9100,
"checks": [
{
"id": "http-check",
"http": "http://externalipservice:9100",
"interval": "30s",
"timeout": "1s"
}
]
},
{
"name": "dnsmasq_exporter",
"token": "token",
"tags": ["dnsmasq_exporter", "node-servicename"],
"port": 9153,
"checks": [
{
"id": "http-check",
"http": "http://externalipservice:9153",
"interval": "30s",
"timeout": "1s"
}
]
},
{
"name": "nomad-agent",
"token": "token",
"tags": ["nomad-agent", "node-servicename"],
"port": 4646,
"checks": [
{
"id": "health-01",
"http": "http://externalipservice:4646/v1/metrics?format=prometheus",
"interval": "2s",
"timeout": "1s"
}
]
}
]
}