Here’s what I’ve made
locals {
config = yamldecode(file("mongo.yml"))
}
job "mongo-rs" {
datacenters = ["dc1"]
# Répartition des tâches sur des hôtes différents
constraint {
operator = "distinct_hosts"
value = "true"
}
update {
max_parallel = 1
min_healthy_time = "20s"
}
# Puisqu'on veut que nos tâches se répartissent sur plusieurs nodes Nomad
# On défini un groupe par instance de MongoDB
group "mongo-1" {
volume "mongo1" {
type = "csi"
source = "mongo1"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
# Le service mesh de consul nécessite le mode bridge
# Note : on ne map pas de port, toutes les communications passeront par le service mesh
network {
mode = "bridge"
}
# On défini un server mongo-master qui pointe sur le serveur primaire actuel. Ça permet aux clients de ne plus s'en soucier
# et de parler au cluster comme si c'était un simple serveur. Consul se chargera de les router vers le bon serveur
service {
name = "mongo-master"
port = local.config.mongo.1.port
connect {
sidecar_service {}
sidecar_task {
resources {
memory = local.config.sidecar_proxy.memory
memory_max = local.config.sidecar_proxy.memory_max
cpu = local.config.sidecar_proxy.cpu
}
}
}
check {
type = "script"
command = "sh"
args = [
"-c",
"if [ \"$(mongo --quiet --port=${local.config.mongo.1.port} --eval 'db.isMaster().ismaster')\" = \"true\" ]; then exit 0; else exit 2; fi"
]
interval = "10s"
timeout = "5s"
task = "mongo-1"
on_update = "ignore"
}
}
# Première instance de MongoDB
service {
name = "mongo-1"
# Cette instance utilise le port 27017
port = local.config.mongo.1.port
# Configuration du service mesh
connect {
sidecar_service {
proxy {
# On défini deux "upstreams" : un pour mongo-2 et l'autre pour mongo-3
# Du POV de mongo-1, toutes les instances seront donc accessibles sur 127.0.0.1
# sur les ports 27017, 27018 et 27019 (ce sera pareil pour les autres instances MongoDB)
upstreams {
destination_name = "mongo-2"
local_bind_port = local.config.mongo.2.port
}
upstreams {
destination_name = "mongo-3"
local_bind_port = local.config.mongo.3.port
}
}
}
# On peut définir des ressources pour le proxy Envoy utilisé par le service mesh
sidecar_task {
resources {
memory = local.config.sidecar_proxy.memory
memory_max = local.config.sidecar_proxy.memory_max
cpu = local.config.sidecar_proxy.cpu
}
}
}
# Health check pour vérifier du bon fonctionnement de l'instance MongoDB
check {
type = "script"
command = "sh"
args = [ "-c", "mongo --port=${local.config.mongo.1.port} --eval \"db.stats()\"" ]
interval = "10s"
timeout = "5s"
task = "mongo-1"
}
}
# On défini une tâche qui initialise le replicaset au démarrage de mongo-1
task "mongo-setup" {
driver = "docker"
lifecycle {
# La tâche s'exécutera après le démarrage de la tâche principale, qui est mongo-1
hook = "poststart"
}
# On crée un fichier qui contient les commandes mongo pour l'initialisation du replicaset
template {
data = <<-EOF
rsconf = {
_id : "rs0",
members: [
{
"_id": 0,
"host": "127.0.0.1:${local.config.mongo.1.port}",
"priority": 3
},
{
"_id": 1,
"host": "127.0.0.1:${local.config.mongo.2.port}",
"priority": 2
},
{
"_id": 2,
"host": "127.0.0.1:${local.config.mongo.3.port}",
"priority": 1
}
]
};
rs.initiate(rsconf);
EOF
# Ce fichier est créé dans local/initrs.js qui sera accessible dans le container dans /local/initrs.js
destination = "local/initrs.js"
}
# On utilise l'image mongo, qui contient le client mongo, mais on surcharge la commande
# ici, on ne lance pas de serveur mongodb, on exécute juste le client mongo avec le fichier initrs.js en paramètre
# On attend 30s pour avoir plus de chance que toutes les instances de MongoDB soient UP (serait mieux de tester les ports
# 27017, 27018 et 27019)
config {
image = local.config.image
command = "bash"
args = [
"-c",
"sleep 30 && mongo --port=${local.config.mongo.1.port} --verbose /local/initrs.js"
]
}
# Puisque cette tâche n'est exécutée qu'à l'initialisation, on ne veut pas lui réserver trop de ressources
# donc on peut modifier l'allocation
resources {
cpu = 50
memory = 50
memory_max = 200
}
}
# La tâche principale MongoDB
task "mongo-1" {
driver = "docker"
config {
image = local.config.image
args = [
"--port=${local.config.mongo.1.port}",
"--replSet=rs0"
]
}
# Ressources réservées pour MongoDB
resources {
memory = local.config.resources.memory
memory_max = local.config.resources.memory_max
cpu = local.config.resources.cpu
}
volume_mount {
volume = "mongo1"
destination = "/data/db"
}
}
}
# Pour mongo-2 et mongo-3, on fait la même chose (en adaptant les sidecar_service), mais pour ceux là
# par besoin de mongo-setup
group "mongo-2" {
network {
mode = "bridge"
}
volume "mongo2" {
type = "csi"
source = "mongo2"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
service {
name = "mongo-master"
port = local.config.mongo.2.port
connect {
sidecar_service {}
sidecar_task {
resources {
memory = local.config.sidecar_proxy.memory
memory_max = local.config.sidecar_proxy.memory_max
cpu = local.config.sidecar_proxy.cpu
}
}
}
check {
type = "script"
command = "bash"
args = [
"-c",
"if [ \"$(mongo --quiet --port=${local.config.mongo.2.port} --eval 'db.isMaster().ismaster')\" = \"true\" ]; then exit 0; else exit 2; fi"
]
interval = "10s"
timeout = "5s"
task = "mongo-2"
on_update = "ignore"
}
}
service {
name = "mongo-2"
port = local.config.mongo.2.port
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "mongo-1"
local_bind_port = local.config.mongo.1.port
}
upstreams {
destination_name = "mongo-3"
local_bind_port = local.config.mongo.3.port
}
}
}
sidecar_task {
resources {
memory = local.config.sidecar_proxy.memory
memory_max = local.config.sidecar_proxy.memory_max
cpu = local.config.sidecar_proxy.cpu
}
}
}
check {
type = "script"
command = "sh"
args = [ "-c", "mongo --port=${local.config.mongo.2.port} --eval \"db.stats()\"" ]
interval = "10s"
timeout = "5s"
task = "mongo-2"
}
}
task "mongo-2" {
driver = "docker"
config {
image = local.config.image
args = [
"--port=${local.config.mongo.2.port}",
"--replSet=rs0"
]
}
resources {
memory = local.config.resources.memory
memory_max = local.config.resources.memory_max
cpu = local.config.resources.cpu
}
volume_mount {
volume = "mongo2"
destination = "/data/db"
}
}
}
group "mongo-3" {
network {
mode = "bridge"
}
volume "mongo3" {
type = "csi"
source = "mongo3"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
service {
name = "mongo-master"
port = local.config.mongo.3.port
connect {
sidecar_service {}
sidecar_task {
resources {
memory = local.config.sidecar_proxy.memory
memory_max = local.config.sidecar_proxy.memory_max
cpu = local.config.sidecar_proxy.cpu
}
}
}
check {
type = "script"
command = "bash"
args = [
"-c",
"if [ \"$(mongo --quiet --port=${local.config.mongo.3.port} --eval 'db.isMaster().ismaster')\" = \"true\" ]; then exit 0; else exit 2; fi"
]
interval = "10s"
timeout = "5s"
task = "mongo-3"
on_update = "ignore"
}
}
service {
name = "mongo-3"
port = local.config.mongo.3.port
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "mongo-1"
local_bind_port = local.config.mongo.1.port
}
upstreams {
destination_name = "mongo-2"
local_bind_port = local.config.mongo.2.port
}
}
}
sidecar_task {
resources {
memory = local.config.sidecar_proxy.memory
memory_max = local.config.sidecar_proxy.memory_max
cpu = local.config.sidecar_proxy.cpu
}
}
}
check {
type = "script"
command = "sh"
args = [ "-c", "mongo --port=${local.config.mongo.3.port} --eval \"db.stats()\"" ]
interval = "10s"
timeout = "5s"
task = "mongo-3"
}
}
task "mongo-3" {
driver = "docker"
config {
image = local.config.image
args = [
"--port=${local.config.mongo.3.port}",
"--replSet=rs0"
]
}
resources {
memory = local.config.resources.memory
memory_max = local.config.resources.memory_max
cpu = local.config.resources.cpu
}
volume_mount {
volume = "mongo3"
destination = "/data/db"
}
}
}
}
With a mongo.yml config file like this
resources:
cpu: 200
memory: 300
memory_max: 1024
sidecar_proxy:
cpu: 100
memory: 80
memory_max: 120
image: mongo:4.4.15-focal
mongo:
1:
port: 27017
2:
port: 27018
3:
port: 27019
And an ingress gateway like this
job "ingress-mongo" {
datacenters = ["dc1"]
spread {
attribute = "${node.unique.id}"
}
# On défini une ingress gateway, qui permet de joindre le cluster mongo (le master)
# depuis l'extérieur du service mesh
group "ingress-mongo-master" {
count = 2
network {
mode = "bridge"
# L'ingress gateway écoutera sur un port dynamique du node Nomad sur lequel elle tourne
port "mongo-master-port" {
to = 27017
}
}
service {
name = "ingress-mongo-master"
port = "mongo-master-port"
connect {
gateway {
ingress {
listener {
port = 27017
service {
# On connect l'ingress gateway au mongo-master
# pour toujours tomber sur le serveur qui permet les écritures
name = "mongo-master"
}
}
}
}
}
# On peut exposer cette ingress gateway via traefik (ou bien on peut s'y connecter directement)
tags = [
"traefint.enable=true",
"traefint.tcp.routers.mongo.rule=HostSNI(`mongo-rs.example.org`)",
"traefint.tcp.routers.mongo.tls=true",
"traefint.tcp.routers.mongo.tls.certresolver=le",
"traefint.consulcatalog.connect=false"
]
}
}
}
This will setup a replica set using the Consul service mesh for everything. You’ll need to add the propriate intentions in consul (mongo1 → mongo2, mongo1 → mongo3, mongo2 → mongo1, mongo2 → mongo3, mongo3 → mongo1, mongo3 → mongo2, ingress-mongo-master → mongo-master)
But there’s still a few issues, so this is not really production ready
- All the mongo members are defined in different groups so they can be spread accross different nodes. But groups are not subject to the update policy, so when you update, all your mongo components will restart at the same time
- I rely on a mongo-master “virtual” service which detects the current mongo leader. But this implies the mongo-master will always appears in a critical state (because 2 of the 3 instances will be in a critical state, as they are not the current leader). This should be replaced with a script updating service tags and user service-resolver’s with subset instead