Hi, got problem when trying to do nomad job with consul connect enabled like in Consul Service Mesh | Nomad | HashiCorp Developer
the connect-proxy-count-dashboard
log
[2022-09-28 07:49:45.908][1][warning][config] [./source/common/config/grpc_stream.h:196] DeltaAggregatedResources gRPC config stream closed since 312s ago: 14, upstream connect error or disconnect/reset before headers. reset reason: connection failure, transport failure reason: TLS error: 268435581:SSL routines:OPENSSL_internal:CERTIFICATE_VERIFY_FAILED
already tried to follow Secure Nomad Jobs with Consul Service Mesh | Nomad | HashiCorp Developer but still no luck.
nomad
client config
data_dir = "/opt/nomad/data"
region = "${nomad_region}"
datacenter = "${nomad_datacenter}"
log_level = "${log_level}"
log_file = "${log_file}"
log_rotate_bytes = "${log_rotate_bytes}"
log_rotate_duration = "${log_rotate_duration}"
log_rotate_max_files = "${log_rotate_max_files}"
bind_addr = "0.0.0.0"
advertise {
http = "{{ GetPrivateIP }}"
rpc = "{{ GetPrivateIP }}"
serf = "{{ GetPrivateIP }}"
}
ports {
http = 4646
rpc = 4647
serf = 4648
}
server {
enabled = false
}
client {
enabled = true
node_class = "${nomad_client_node_class}"
cni_path = "/opt/cni/bin"
cni_config_dir = "/opt/cni/config"
gc_interval = "1m"
gc_disk_usage_threshold = 70
gc_inode_usage_threshold = 50
gc_max_allocs = 50
gc_parallel_destroys = 2
reserved {
cpu = 128
memory = 128
disk = 1024
reserved_ports = "22,80,443,8200-8600"
}
}
telemetry {
prometheus_metrics = true
disable_hostname = true
publish_allocation_metrics = true
publish_node_metrics = true
collection_interval = "1s"
}
acl {
enabled = true
}
tls {
http = true
rpc = true
verify_server_hostname = true
verify_https_client = true
ca_file = "/opt/nomad/tls/ca.crt"
cert_file = "/opt/nomad/tls/tls.crt"
key_file = "/opt/nomad/tls/tls.key"
}
consul {
address = "127.0.0.1:8501"
grpc_address = "127.0.0.1:8502"
auto_advertise = true
checks_use_advertise = true
server_service_name = "${nomad_server_consul_service_name}"
client_service_name = "${nomad_client_consul_service_name}"
client_auto_join = true
allow_unauthenticated = true
ssl = true
share_ssl = true
ca_file = "/opt/consul/tls/ca.crt"
cert_file = "/opt/consul/tls/tls.crt"
key_file = "/opt/consul/tls/tls.key"
verify_ssl = true
}
vault {
enabled = true
address = "${vault_server_address}"
ca_file = "/opt/vault/tls/ca.crt"
cert_file = "/opt/vault/tls/tls.crt"
key_file = "/opt/vault/tls/tls.key"
tls_server_name = "vault"
}
plugin "docker" {
config {
auth {
helper = "ecr-login"
}
extra_labels = ["*"]
gc {
image = true
image_delay = "1h"
container = true
dangling_containers {
enabled = true
dry_run = false
period = "5m"
creation_grace = "5m"
}
}
volumes {
enabled = true
}
allow_privileged = false
}
}
auto_encrypt {
tls = true
ip_san = [
"10.11.71.215"
]
}
enable_debug = false
consul
client config
server = false
bind_addr = "0.0.0.0"
client_addr = "0.0.0.0"
advertise_addr = "{{ GetPrivateIP }}"
encrypt = "${gossip_encryption_key}"
log_level = "${log_level}"
datacenter = "${consul_datacenter}"
data_dir = "/opt/consul/data"
retry_join = [
"provider=\"${provider_id}\" region=\"${aws_region}\" tag_key=\"${aws_tag_key}\" tag_value=\"${aws_tag_value}\" addr_type=\"${aws_addr_type}\""
]
ports {
serf_lan = 8301
http = 8500
https = 8501
grpc = 8502
dns = 8600
}
tls {
defaults {
verify_outgoing = true
verify_incoming = true
ca_file = "/opt/consul/tls/ca.crt"
}
internal_rpc {
verify_server_hostname = true
}
}
acl {
enabled = true
default_policy = "deny"
down_policy = "extend-cache"
enable_token_persistence = true
tokens {
default = "${consul_agent_token}"
agent = "${consul_agent_token}"
}
}
connect {
enabled = true
}
enable_debug = false
the ${xxxx}
will be replaced with appropriate value
consul agent log :
Sep 28 07:58:49 ip-10-11-71-215 consul[833730]: 2022-09-28T07:58:49.886Z [WARN] agent: [core]grpc: Server.Serve failed to complete security handshake from "127.0.0.1:44012": remote error: tls: unknown certificate authority
Sep 28 07:58:52 ip-10-11-71-215 consul[833730]: 2022-09-28T07:58:52.004Z [WARN] agent: Check socket connection failed: check=service:_nomad-task-a6018005-b908-c4e8-bdfd-84dbe219a93d-group-dashboard-count-dashboard-http-sidecar-proxy:2 error="dial tcp 10.11.71.215:23887: connect: connection refused"
Sep 28 07:58:52 ip-10-11-71-215 consul[833730]: 2022-09-28T07:58:52.004Z [WARN] agent: Check is now critical: check=service:_nomad-task-a6018005-b908-c4e8-bdfd-84dbe219a93d-group-dashboard-count-dashboard-http-sidecar-proxy:2
Sep 28 07:58:52 ip-10-11-71-215 consul[833730]: 2022-09-28T07:58:52.396Z [WARN] agent: Check socket connection failed: check=service:_nomad-task-d793db41-d237-839f-d6be-fe48550338e7-group-api-count-api-9001-sidecar-proxy:2 error="dial tcp 10.11.71.215:26284: connect: connection refused"
Sep 28 07:58:52 ip-10-11-71-215 consul[833730]: 2022-09-28T07:58:52.396Z [WARN] agent: Check is now critical: check=service:_nomad-task-d793db41-d237-839f-d6be-fe48550338e7-group-api-count-api-9001-sidecar-proxy:2
job spec :
job "countdash" {
datacenters = ["us-west-2"]
type = "service"
group "api" {
network {
mode = "bridge"
}
service {
name = "count-api"
port = "9001"
connect {
sidecar_service {}
sidecar_task {
config {
auth_soft_fail = true
}
}
}
}
task "web" {
driver = "docker"
config {
auth_soft_fail = true
image = "hashicorpnomad/counter-api:v1"
}
}
}
group "dashboard" {
network {
mode = "bridge"
port "http" {
to = 9002
}
}
service {
name = "count-dashboard"
port = "http"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "count-api"
local_bind_port = 8080
}
}
}
sidecar_task {
config {
auth_soft_fail = true
}
}
}
}
task "dashboard" {
driver = "docker"
env {
COUNTING_SERVICE_URL = "http://${NOMAD_UPSTREAM_ADDR_count_api}"
}
config {
auth_soft_fail = true
image = "hashicorpnomad/counter-dashboard:v1"
}
}
}
}
anyone can point me what should I do to fix above problem? Thanks!