Hello,
I finally managed to have all my logs and nomad metrics through vector running in a docker container (via nomad).
I copy/paste below my nomad client config (here it is also setup as server) to see what you need to mount. You’ll also find my vector job config file. I hope it will help someone.
Some notes :
- For journald logs to be accessible from the vector container, you need to mount /var/log/journal and give your container the same machine-id. It means, also mounting the /etc/machine-id directory (see below)
- To access nomad host from the vector container, you may need to add this extra args (-add-host host.docker.internal:host-gateway) to the docker config
- Some nomad metrics are not accessible (depends on your system …). In my case, nomad_client_allocs_memory_rss is not accessible.
PS :
- I use terraform so some variables are passed/replaced by terraform
- My host machine is running Debian 11.6
My nomad.hcl config
# Full configuration options can be found at https://www.nomadproject.io/docs/configuration
#datacenter = "dc1"
data_dir = "/opt/nomad/data"
# Bind on tailscale interface
bind_addr = "{{ GetInterfaceIP \"tailscale0\" }}"
# See https://developer.hashicorp.com/nomad/tutorials/access-control/access-control-bootstrap
#acl {
# enabled = true
#}
telemetry {
collection_interval = "15s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}
server {
enabled = ${server?"true":"false"}
default_scheduler_config {
memory_oversubscription_enabled = true
}
bootstrap_expect=${bootstrap_expect}
}
client {
enabled = ${client?"true":"false"}
host_network "tailscale" {
interface = "tailscale0"
reserved_ports = "${reserved_ports}"
}
# Used for docker logs
host_volume "docker-sock-ro" {
path = "/var/run/docker.sock"
read_only = true
}
# Used for host systemd logs
host_volume "journald-ro" {
path = "/var/log/journal"
read_only = true
}
host_volume "machineid-ro" {
path = "/etc/machine-id"
read_only = true
}
}
plugin "docker" {
config {
# extra Docker labels to be set by Nomad on each Docker container with the appropriate value
extra_labels = ["job_name", "task_group_name", "task_name", "namespace", "node_name"]
}
}
/*consul {
address = "{{ GetInterfaceIP \"tailscale0\" }}:8500"
}*/
My vector job file (passed to the nomad_job terraform resource)
job "vector" {
datacenters = ["dc1"]
# system job, runs on all nodes
type = "system"
update {
min_healthy_time = "10s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
}
group "vector" {
count = 1
restart {
attempts = 3
interval = "10m"
delay = "30s"
mode = "fail"
}
# docker socket volume
volume "docker-sock" {
type = "host"
source = "docker-sock-ro"
read_only = true
}
volume "journald" {
type = "host"
source = "journald-ro"
read_only = true
}
volume "machineid" {
type = "host"
source = "machineid-ro"
read_only = true
}
ephemeral_disk {
size = 500 # 500 MB
sticky = true
}
task "vector" {
driver = "docker"
config {
image = "timberio/vector:0.26.0-debian"
}
# docker socket volume mount
volume_mount {
volume = "docker-sock"
destination = "/var/run/docker.sock"
read_only = true
}
volume_mount {
volume = "journald"
destination = "/var/log/journal"
read_only = true
}
volume_mount {
volume = "machineid"
destination = "/etc/machine-id"
read_only = true
}
# Vector won't start unless the sinks(backends) configured are healthy
env {
VECTOR_CONFIG = "local/vector.toml"
VECTOR_REQUIRE_HEALTHY = "true"
}
# resource limits are a good idea because you don't want your log collection to consume all resources available
resources {
cpu = 500 # MHz
memory = 256 # MB
}
# template with Vector's configuration
template {
destination = "local/vector.toml"
change_mode = "signal"
change_signal = "SIGHUP"
# overriding the delimiters to [[ ]] to avoid conflicts with Vector's native templating, which also uses {{ }}
left_delimiter = "[["
right_delimiter = "]]"
data=<<EOH
[[- with nomadVar "nomad/jobs/vector/vector/vector" -]]
data_dir = "alloc/data/"
[api]
enabled = false
[sources.host_journald_logs]
type = "journald"
current_boot_only = true
since_now = true
include_units = []
# Warning and above
include_matches.PRIORITY = [ "0", "1", "2", "3", "4" ]
[sources.logs]
type = "docker_logs"
[transforms.apps_logs]
type = "remap"
inputs = ["logs"]
source = ".message = parse_json!(.message)"
[sources.nomad_host_metrics]
type = "prometheus_scrape"
endpoints = [ "http://${nomad_host_tailnet_ip}/v1/metrics?format=prometheus" ]
scrape_interval_secs = 15
instance_tag = "instance"
endpoint_tag = "endpoint"
[[ if eq "${environment}" "dev" ]]
[sinks.out]
type = "console"
inputs = [ "apps_logs", "host_journald_logs", "nomad_host_metrics" ]
encoding.codec = "json"
[[ end ]]
[sinks.prometheus]
type = "prometheus_remote_write"
inputs = [ "nomad_host_metrics" ]
endpoint = "https://prometheus-prod-01-eu-west-0.grafana.net/api/prom/push"
healthcheck.enabled = false
auth.strategy = "basic"
auth.user = "[[.prometheus_user]]"
auth.password = "[[.prometheus_password]]"
[sinks.loki]
type = "loki"
inputs = ["apps_logs", "host_journald_logs"]
endpoint = "https://[[.loki_user]]:[[.loki_password]]@logs-prod-eu-west-0.grafana.net"
compression = "snappy"
encoding.codec = "json"
healthcheck.enabled = true
# remove fields that have been converted to labels to avoid having the field twice
remove_label_fields = true
[sinks.loki.labels]
# See https://vector.dev/docs/reference/vrl/expressions/#path-example-quoted-path
job = "{{label.\"com.hashicorp.nomad.job_name\" }}"
task = "{{label.\"com.hashicorp.nomad.task_name\" }}"
group = "{{label.\"com.hashicorp.nomad.task_group_name\" }}"
#namespace = "{{label.\"com.hashicorp.nomad.namespace\" }}"
node = "{{label.\"com.hashicorp.nomad.node_name\" }}"
correlation_id = "{{ message.requestId }}"
[[- end -]]
EOH
}
kill_timeout = "30s"
}
}
}
Regarding the nomad dashboards, i started with the nomad integration provided by Grafana and did some adjustments.
I did not share here my HaProxy and apps logs/metrics but you get the idea.
Hope it helps,
Best regards,
Brahim

