I found this problem. It uses CSI. Whenever the container configuration is changed and restarted, es will not be able to reuse the previous volume due to authentication issues. Whether there are parameters that enable it to reuse previous data,
I’m thinking that the previous data can’t be used because the container information has changed. Is there any configuration in nomad that can make the newly started container and the previous container information the same, so that the previous data can be reused
job es-test {
namespace = "elasticsearch"
datacenters = [ "dc1" ]
type = "service"
update {
max_parallel = 1
health_check = "checks"
min_healthy_time = "30s"
healthy_deadline = "5m"
auto_revert = true
canary = 0
stagger = "30s"
}
##################################### master-0 ############################################
group "master-0" {
count = 1
restart {
attempts = 3
delay = "30s"
interval = "5m"
mode = "fail"
}
network {
dns {
servers = ["10.103.1.11"]
}
port "request" {
static = 19202
}
port "communication" {
static = 19203
}
}
task "elasticsearch" {
driver = "docker"
kill_timeout = "300s"
kill_signal = "SIGTERM"
env {
LOG4J_FORMAT_MSG_NO_LOOKUPS = true
#ES_TMPDIR = "/usr/share/elasticsearch/temp"
}
template {
data = <<EOF
cluster:
name: {{ env "NOMAD_JOB_NAME" }}
publish:
timeout: 300s
join:
timeout: 300s
initial_master_nodes:
- {{ env "NOMAD_IP_communication" }}:{{ env "NOMAD_HOST_PORT_communication" }}
node:
name: {{ env "NOMAD_JOB_NAME" }}-{{ env "NOMAD_GROUP_NAME" }}
master: true
data: false
ingest: false
network:
host: 0.0.0.0
discovery:
seed_hosts:
- es-test-master-0.service.consul:19203
- es-test-master-1.service.consul:19203
- es-test-master-2.service.consul:19203
zen:
ping_timeout: 120s
fd:
ping_interval: 120s
ping_timeout: 120s
ping_retries: 5
path:
data:
- /usr/share/elasticsearch/data
logs: /usr/share/elasticsearch/log
bootstrap.memory_lock: true
indices.query.bool.max_clause_count: 10000
EOF
destination = "local/elasticsearch.yml"
}
template {
data = <<EOF
-Xms10g
-Xmx10g
8-13:-XX:+UseConcMarkSweepGC
8-13:-XX:CMSInitiatingOccupancyFraction=75
8-13:-XX:+UseCMSInitiatingOccupancyOnly
14-:-XX:+UseG1GC
-Djava.io.tmpdir=${ES_TMPDIR}
-XX:+HeapDumpOnOutOfMemoryError
-XX:HeapDumpPath=data
-XX:ErrorFile=logs/hs_err_pid%!p(MISSING).log
8:-XX:+PrintGCDetails
8:-XX:+PrintGCDateStamps
8:-XX:+PrintTenuringDistribution
8:-XX:+PrintGCApplicationStoppedTime
8:-Xloggc:logs/gc.log
8:-XX:+UseGCLogFileRotation
8:-XX:NumberOfGCLogFiles=32
8:-XX:GCLogFileSize=64m
9-:-Xlog:gc*,gc+age=trace,safepoint:file=logs/gc.log:utctime,pid,tags:filecount=32,filesize=64m
EOF
destination = "local/jvm.options"
}
config {
image = "elasticsearch:7.8.1"
force_pull = false
volumes = [
"./local/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml",
"./local/jvm.options:/usr/share/elasticsearch/config/jvm.options"
]
command = "bin/elasticsearch"
args = [
"-Enetwork.publish_host=${NOMAD_IP_request}",
"-Ehttp.publish_port=${NOMAD_HOST_PORT_request}",
"-Ehttp.port=${NOMAD_PORT_request}",
"-Etransport.publish_port=${NOMAD_HOST_PORT_communication}",
"-Etransport.tcp.port=${NOMAD_PORT_communication}"
]
ports = [
"request",
"communication"
]
ulimit {
memlock = "-1"
nofile = "65536"
nproc = "65536"
}
}
resources {
cpu = 25600
memory = 16484
}
service {
name = "${NOMAD_JOB_NAME}"
port = "request"
check {
name = "rest-tcp"
type = "tcp"
interval = "10s"
timeout = "2s"
}
check {
name = "rest-http"
type = "http"
path = "/"
interval = "5s"
timeout = "4s"
}
}
service {
name = "es-test-master-0"
port = "communication"
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
}
}
##################################### master-1 ############################################
group "master-1" {
count = 1
restart {
attempts = 3
delay = "30s"
interval = "5m"
mode = "fail"
}
network {
dns {
servers = ["10.103.1.11"]
}
port "request" {
static = 19202
}
port "communication" {
static = 19203
}
}
task "await-es-master-0-comm" {
driver = "docker"
config {
image = "busybox:1.28"
command = "sh"
args = ["-c", "echo -n 'Waiting for service'; until nslookup es-test-master-0.service.consul 2>&1 >/dev/null; do echo '.'; sleep 2; done"]
network_mode = "host"
}
resources {
cpu = 200
memory = 128
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
task "elasticsearch" {
driver = "docker"
kill_timeout = "300s"
kill_signal = "SIGTERM"
env {
LOG4J_FORMAT_MSG_NO_LOOKUPS = true
#ES_TMPDIR = "/usr/share/elasticsearch/temp"
}
template {
data = <<EOF
cluster:
name: {{ env "NOMAD_JOB_NAME" }}
publish:
timeout: 300s
join:
timeout: 300s
initial_master_nodes:
- {{ range service "es-test-master-0" }}{{ .Address }}:{{ .Port }}{{ end }}
node:
name: {{ env "NOMAD_JOB_NAME" }}-{{ env "NOMAD_GROUP_NAME" }}
master: true
data: false
ingest: false
network:
host: 0.0.0.0
discovery:
seed_hosts:
- es-test-master-0.service.consul:19203
- es-test-master-1.service.consul:19203
- es-test-master-2.service.consul:19203
zen:
ping_timeout: 120s
fd:
ping_interval: 120s
ping_timeout: 120s
ping_retries: 5
path:
data:
- /usr/share/elasticsearch/data
logs: /usr/share/elasticsearch/log
bootstrap.memory_lock: true
indices.query.bool.max_clause_count: 10000
EOF
destination = "local/elasticsearch.yml"
}
template {
data = <<EOF
-Xms10g
-Xmx10g
8-13:-XX:+UseConcMarkSweepGC
8-13:-XX:CMSInitiatingOccupancyFraction=75
8-13:-XX:+UseCMSInitiatingOccupancyOnly
14-:-XX:+UseG1GC
-Djava.io.tmpdir=${ES_TMPDIR}
-XX:+HeapDumpOnOutOfMemoryError
-XX:HeapDumpPath=data
-XX:ErrorFile=logs/hs_err_pid%!p(MISSING).log
8:-XX:+PrintGCDetails
8:-XX:+PrintGCDateStamps
8:-XX:+PrintTenuringDistribution
8:-XX:+PrintGCApplicationStoppedTime
8:-Xloggc:logs/gc.log
8:-XX:+UseGCLogFileRotation
8:-XX:NumberOfGCLogFiles=32
8:-XX:GCLogFileSize=64m
9-:-Xlog:gc*,gc+age=trace,safepoint:file=logs/gc.log:utctime,pid,tags:filecount=32,filesize=64m
EOF
destination = "local/jvm.options"
}
config {
image = "elasticsearch:7.8.1"
force_pull = false
volumes = [
"./local/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml",
"./local/jvm.options:/usr/share/elasticsearch/config/jvm.options"
]
command = "bin/elasticsearch"
args = [
"-Enetwork.publish_host=${NOMAD_IP_request}",
"-Ehttp.publish_port=${NOMAD_HOST_PORT_request}",
"-Ehttp.port=${NOMAD_PORT_request}",
"-Etransport.publish_port=${NOMAD_HOST_PORT_communication}",
"-Etransport.tcp.port=${NOMAD_PORT_communication}"
]
ports = [
"request",
"communication"
]
ulimit {
memlock = "-1"
nofile = "65536"
nproc = "65536"
}
}
resources {
cpu = 25600
memory = 16484
}
service {
name = "${NOMAD_JOB_NAME}"
port = "request"
check {
name = "rest-tcp"
type = "tcp"
interval = "10s"
timeout = "2s"
}
check {
name = "rest-http"
type = "http"
path = "/"
interval = "5s"
timeout = "4s"
}
}
service {
name = "es-test-master-1"
port = "communication"
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
}
}
##################################### master-2 ############################################
group "master-2" {
count = 1
restart {
attempts = 3
delay = "30s"
interval = "5m"
mode = "fail"
}
network {
dns {
servers = ["10.103.1.11"]
}
port "request" {
static = 19202
}
port "communication" {
static = 19203
}
}
task "await-es-master-0-comm" {
driver = "docker"
config {
image = "busybox:1.28"
command = "sh"
args = ["-c", "echo -n 'Waiting for service'; until nslookup es-test-master-0.service.consul 2>&1 >/dev/null; do echo '.'; sleep 2; done"]
network_mode = "host"
}
resources {
cpu = 200
memory = 128
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
task "elasticsearch" {
driver = "docker"
kill_timeout = "300s"
kill_signal = "SIGTERM"
env {
LOG4J_FORMAT_MSG_NO_LOOKUPS = true
#ES_TMPDIR = "/usr/share/elasticsearch/temp"
}
template {
data = <<EOF
cluster:
name: {{ env "NOMAD_JOB_NAME" }}
publish:
timeout: 300s
join:
timeout: 300s
initial_master_nodes:
- {{ range service "es-test-master-0" }}{{ .Address }}:{{ .Port }}{{ end }}
node:
name: {{ env "NOMAD_JOB_NAME" }}-{{ env "NOMAD_GROUP_NAME" }}
master: true
data: false
ingest: false
network:
host: 0.0.0.0
discovery:
seed_hosts:
- es-test-master-0.service.consul:19203
- es-test-master-1.service.consul:19203
- es-test-master-2.service.consul:19203
zen:
ping_timeout: 120s
fd:
ping_interval: 120s
ping_timeout: 120s
ping_retries: 5
path:
data:
- /usr/share/elasticsearch/data
logs: /usr/share/elasticsearch/log
bootstrap.memory_lock: true
indices.query.bool.max_clause_count: 10000
EOF
destination = "local/elasticsearch.yml"
}
template {
data = <<EOF
-Xms10g
-Xmx10g
8-13:-XX:+UseConcMarkSweepGC
8-13:-XX:CMSInitiatingOccupancyFraction=75
8-13:-XX:+UseCMSInitiatingOccupancyOnly
14-:-XX:+UseG1GC
-Djava.io.tmpdir=${ES_TMPDIR}
-XX:+HeapDumpOnOutOfMemoryError
-XX:HeapDumpPath=data
-XX:ErrorFile=logs/hs_err_pid%!p(MISSING).log
8:-XX:+PrintGCDetails
8:-XX:+PrintGCDateStamps
8:-XX:+PrintTenuringDistribution
8:-XX:+PrintGCApplicationStoppedTime
8:-Xloggc:logs/gc.log
8:-XX:+UseGCLogFileRotation
8:-XX:NumberOfGCLogFiles=32
8:-XX:GCLogFileSize=64m
9-:-Xlog:gc*,gc+age=trace,safepoint:file=logs/gc.log:utctime,pid,tags:filecount=32,filesize=64m
EOF
destination = "local/jvm.options"
}
config {
image = "elasticsearch:7.8.1"
force_pull = false
volumes = [
"./local/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml",
"./local/jvm.options:/usr/share/elasticsearch/config/jvm.options"
]
command = "bin/elasticsearch"
args = [
"-Enetwork.publish_host=${NOMAD_IP_request}",
"-Ehttp.publish_port=${NOMAD_HOST_PORT_request}",
"-Ehttp.port=${NOMAD_PORT_request}",
"-Etransport.publish_port=${NOMAD_HOST_PORT_communication}",
"-Etransport.tcp.port=${NOMAD_PORT_communication}"
]
ports = [
"request",
"communication"
]
ulimit {
memlock = "-1"
nofile = "65536"
nproc = "65536"
}
}
resources {
cpu = 25600
memory = 16484
}
service {
name = "${NOMAD_JOB_NAME}"
port = "request"
check {
name = "rest-tcp"
type = "tcp"
interval = "10s"
timeout = "2s"
}
check {
name = "rest-http"
type = "http"
path = "/"
interval = "5s"
timeout = "4s"
}
}
service {
name = "es-test-master-2"
port = "communication"
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
}
}
##################################### data-0 ############################################
group "data-0" {
count = 1
restart {
attempts = 3
delay = "30s"
interval = "5m"
mode = "fail"
}
network {
dns {
servers = ["10.103.1.11"]
}
port "request" {
static = 29202
}
port "communication" {
static = 29203
}
}
volume "ceph-volume" {
type = "csi"
read_only = false
source = "data0-100GB"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
task "await-es-master-0-comm" {
driver = "docker"
config {
image = "busybox:1.28"
command = "sh"
args = ["-c", "echo -n 'Waiting for service'; until nslookup es-test-master-0.service.consul 2>&1 >/dev/null; do echo '.'; sleep 2; done"]
network_mode = "host"
}
resources {
cpu = 200
memory = 128
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
task "elasticsearch" {
driver = "docker"
kill_timeout = "300s"
kill_signal = "SIGTERM"
volume_mount {
volume = "ceph-volume"
destination = "/srv"
read_only = false
}
env {
#ES_TMPDIR = "/usr/share/elasticsearch/temp"
LOG4J_FORMAT_MSG_NO_LOOKUPS = true
}
template {
data = <<EOF
cluster:
name: {{ env "NOMAD_JOB_NAME" }}
publish:
timeout: 300s
join:
timeout: 300s
initial_master_nodes:
- {{ range service "es-test-master-0" }}{{ .Address }}:{{ .Port }}{{ end }}
node:
name: {{ env "NOMAD_JOB_NAME" }}-{{ env "NOMAD_GROUP_NAME" }}
master: false
data: true
ingest: true
network:
host: 0.0.0.0
discovery:
seed_hosts:
- es-test-master-0.service.consul:19203
- es-test-master-1.service.consul:19203
- es-test-master-2.service.consul:19203
zen:
ping_timeout: 120s
fd:
ping_interval: 120s
ping_timeout: 120s
ping_retries: 5
path:
data:
- /srv/es-test_data-0/data
logs: /srv/es-test_data-0/log
bootstrap.memory_lock: true
indices.query.bool.max_clause_count: 10000
EOF
destination = "local/elasticsearch.yml"
}
template {
data = <<EOF
-Xms10g
-Xmx10g
8-13:-XX:+UseConcMarkSweepGC
8-13:-XX:CMSInitiatingOccupancyFraction=75
8-13:-XX:+UseCMSInitiatingOccupancyOnly
14-:-XX:+UseG1GC
-Djava.io.tmpdir=${ES_TMPDIR}
-XX:+HeapDumpOnOutOfMemoryError
-XX:HeapDumpPath=data
-XX:ErrorFile=logs/hs_err_pid%!p(MISSING).log
8:-XX:+PrintGCDetails
8:-XX:+PrintGCDateStamps
8:-XX:+PrintTenuringDistribution
8:-XX:+PrintGCApplicationStoppedTime
8:-Xloggc:logs/gc.log
8:-XX:+UseGCLogFileRotation
8:-XX:NumberOfGCLogFiles=32
8:-XX:GCLogFileSize=64m
9-:-Xlog:gc*,gc+age=trace,safepoint:file=logs/gc.log:utctime,pid,tags:filecount=32,filesize=64m
EOF
destination = "local/jvm.options"
}
config {
image = "elasticsearch:7.8.1"
force_pull = false
volumes = [
"./local/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml",
"./local/jvm.options:/usr/share/elasticsearch/config/jvm.options"
]
command = "bin/elasticsearch"
args = [
"-Enetwork.publish_host=${NOMAD_IP_request}",
"-Ehttp.publish_port=${NOMAD_HOST_PORT_request}",
"-Ehttp.port=${NOMAD_PORT_request}",
"-Etransport.publish_port=${NOMAD_HOST_PORT_communication}",
"-Etransport.tcp.port=${NOMAD_PORT_communication}"
]
ports = [
"request",
"communication"
]
ulimit {
memlock = "-1"
nofile = "65536"
nproc = "65536"
}
}
resources {
cpu = 25600
memory = 16484
}
service {
name = "${NOMAD_JOB_NAME}"
port = "request"
check {
name = "rest-tcp"
type = "tcp"
interval = "10s"
timeout = "2s"
}
check {
name = "rest-http"
type = "http"
path = "/"
interval = "5s"
timeout = "4s"
}
}
service {
name = "es-data-comm"
port = "communication"
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
}
}
##################################### data-1 ############################################
group "data-1" {
count = 1
restart {
attempts = 3
delay = "30s"
interval = "5m"
mode = "fail"
}
network {
dns {
servers = ["10.103.1.11"]
}
port "request" {
static = 29202
}
port "communication" {
static = 29203
}
}
volume "ceph-volume" {
type = "csi"
read_only = false
source = "data1-100GB"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
task "await-es-master-0-comm" {
driver = "docker"
config {
image = "busybox:1.28"
command = "sh"
args = ["-c", "echo -n 'Waiting for service'; until nslookup es-test-master-0.service.consul 2>&1 >/dev/null; do echo '.'; sleep 2; done"]
network_mode = "host"
}
resources {
cpu = 200
memory = 128
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
task "elasticsearch" {
driver = "docker"
kill_timeout = "300s"
kill_signal = "SIGTERM"
volume_mount {
volume = "ceph-volume"
destination = "/srv"
read_only = false
}
env {
#ES_TMPDIR = "/usr/share/elasticsearch/temp"
LOG4J_FORMAT_MSG_NO_LOOKUPS = true
}
template {
data = <<EOF
cluster:
name: {{ env "NOMAD_JOB_NAME" }}
publish:
timeout: 300s
join:
timeout: 300s
initial_master_nodes:
- {{ range service "es-test-master-0" }}{{ .Address }}:{{ .Port }}{{ end }}
node:
name: {{ env "NOMAD_JOB_NAME" }}-{{ env "NOMAD_GROUP_NAME" }}
master: false
data: true
ingest: true
network:
host: 0.0.0.0
discovery:
seed_hosts:
- es-test-master-0.service.consul:19203
- es-test-master-1.service.consul:19203
- es-test-master-2.service.consul:19203
zen:
ping_timeout: 120s
fd:
ping_interval: 120s
ping_timeout: 120s
ping_retries: 5
path:
data:
- /srv/es-test_data-1/data
logs: /srv/es-test_data-1/log
bootstrap.memory_lock: true
indices.query.bool.max_clause_count: 10000
EOF
destination = "local/elasticsearch.yml"
}
template {
data = <<EOF
-Xms10g
-Xmx10g
8-13:-XX:+UseConcMarkSweepGC
8-13:-XX:CMSInitiatingOccupancyFraction=75
8-13:-XX:+UseCMSInitiatingOccupancyOnly
14-:-XX:+UseG1GC
-Djava.io.tmpdir=${ES_TMPDIR}
-XX:+HeapDumpOnOutOfMemoryError
-XX:HeapDumpPath=data
-XX:ErrorFile=logs/hs_err_pid%!p(MISSING).log
8:-XX:+PrintGCDetails
8:-XX:+PrintGCDateStamps
8:-XX:+PrintTenuringDistribution
8:-XX:+PrintGCApplicationStoppedTime
8:-Xloggc:logs/gc.log
8:-XX:+UseGCLogFileRotation
8:-XX:NumberOfGCLogFiles=32
8:-XX:GCLogFileSize=64m
9-:-Xlog:gc*,gc+age=trace,safepoint:file=logs/gc.log:utctime,pid,tags:filecount=32,filesize=64m
EOF
destination = "local/jvm.options"
}
config {
image = "elasticsearch:7.8.1"
force_pull = false
volumes = [
"./local/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml",
"./local/jvm.options:/usr/share/elasticsearch/config/jvm.options"
]
command = "bin/elasticsearch"
args = [
"-Enetwork.publish_host=${NOMAD_IP_request}",
"-Ehttp.publish_port=${NOMAD_HOST_PORT_request}",
"-Ehttp.port=${NOMAD_PORT_request}",
"-Etransport.publish_port=${NOMAD_HOST_PORT_communication}",
"-Etransport.tcp.port=${NOMAD_PORT_communication}"
]
ports = [
"request",
"communication"
]
ulimit {
memlock = "-1"
nofile = "65536"
nproc = "65536"
}
}
resources {
cpu = 25600
memory = 16484
}
service {
name = "${NOMAD_JOB_NAME}"
port = "request"
check {
name = "rest-tcp"
type = "tcp"
interval = "10s"
timeout = "2s"
}
check {
name = "rest-http"
type = "http"
path = "/"
interval = "5s"
timeout = "4s"
}
}
service {
name = "es-data-comm"
port = "communication"
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
}
}
############### kibana ##########
group "kibana-server" {
count = 1
restart {
attempts = 3
delay = "30s"
interval = "5m"
mode = "fail"
}
network {
dns {
servers = ["10.103.1.11"]
}
port "http" {
static = 5601
}
}
# volume "ceph-volume" {
# type = "csi"
# read_only = false
# source = "es-kibana"
# access_mode = "single-node-writer"
# attachment_mode = "file-system"
# }
task "await-es-req" {
driver = "docker"
config {
image = "busybox:1.28"
command = "sh"
args = ["-c", "echo -n 'Waiting for service'; until nslookup es-test.service.consul 2>&1 >/dev/null; do echo '.'; sleep 2; done"]
network_mode = "host"
}
resources {
cpu = 200
memory = 128
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
task "kibana" {
driver = "docker"
kill_timeout = "300s"
kill_signal = "SIGTERM"
# volume_mount {
# volume = "ceph-volume"
# destination = "/srv"
# read_only = false
# }
template {
data = <<EOF
server.host: "0.0.0.0"
server.maxPayloadBytes: 2097152
elasticsearch.requestTimeout: 60000
elasticsearch.hosts:
- http://es-test.service.consul:29202
EOF
destination = "local/kibana.yml"
}
config {
image = "x602/kibana:7.8.1"
force_pull = false
volumes = [
"./local/kibana.yml:/opt/kibana/config/kibana.yml",
]
command = "bin/kibana"
args = [
"--host",
"0.0.0.0",
"--port",
"${NOMAD_PORT_http}"
]
ports = [
"http"
]
ulimit {
memlock = "-1"
nofile = "65536"
nproc = "65536"
}
}
resources {
cpu = 100
memory = 1024
}
service {
name = "es-kibana-http"
port = "http"
check {
name = "http-tcp"
type = "tcp"
interval = "10s"
timeout = "2s"
}
check {
name = "http-http"
type = "http"
path = "/"
interval = "5s"
timeout = "4s"
}
}
}
}
}