Hi,
I have a problem with volume mount using Ceph CSI Plugin.
The version info of ceph and nomad is:
[opc@minio-0 ~]$ sudo ceph --version
ceph version 14.2.20 (36274af6eb7f2a5055f2d53ad448f2694e9046a0) nautilus (stable)
[opc@minio-0 ~]$ nomad -version
Nomad v1.0.4 (9294f35f9aa8dbb4acb6e85fa88e3e2534a3e41a)
First, I have run jobs of ceph csi controller and nodes with this.
## creat a file, ceph-csi-plugin-controller.nomad.
cat <<EOC > ceph-csi-plugin-controller.nomad
job "ceph-csi-plugin-controller" {
datacenters = ["dc1"]
group "controller" {
network {
port "metrics" {}
}
task "ceph-controller" {
template {
data = <<EOF
[{
"clusterID": "c628ebf1-d03f-4806-9941-8b5840338b14",
"monitors": [
"v1:10.0.0.3:6789",
"v1:10.0.0.4:6789",
"v1:10.0.0.5:6789",
"v2:10.0.0.3:3300",
"v2:10.0.0.4:3300",
"v2:10.0.0.5:3300"
]
}]
EOF
destination = "local/config.json"
change_mode = "restart"
}
driver = "docker"
config {
image = "quay.io/cephcsi/cephcsi:v3.3.1"
volumes = [
"./local/config.json:/etc/ceph-csi-config/config.json"
]
args = [
"--type=rbd",
"--controllerserver=true",
"--drivername=rbd.csi.ceph.com",
"--endpoint=unix://csi/csi.sock",
"--nodeid=\${node.unique.name}",
"--instanceid=\${node.unique.name}-controller",
"--pidlimit=-1",
"--v=5",
"--metricsport=\$\${NOMAD_PORT_metrics}"
]
}
resources {
cpu = 500
memory = 256
}
service {
name = "ceph-csi-controller"
port = "metrics"
tags = [ "prometheus" ]
}
csi_plugin {
id = "ceph-csi"
type = "controller"
mount_dir = "/csi"
}
}
}
}
EOC
## create a ceph-csi-plugin-nodes.nomad.
cat <<EOC > ceph-csi-plugin-nodes.nomad
job "ceph-csi-plugin-nodes" {
datacenters = ["dc1"]
type = "system"
group "nodes" {
network {
port "metrics" {}
}
task "ceph-node" {
driver = "docker"
template {
data = <<EOF
[{
"clusterID": "c628ebf1-d03f-4806-9941-8b5840338b14",
"monitors": [
"v1:10.0.0.3:6789",
"v1:10.0.0.4:6789",
"v1:10.0.0.5:6789",
"v2:10.0.0.3:3300",
"v2:10.0.0.4:3300",
"v2:10.0.0.5:3300"
]
}]
EOF
destination = "local/config.json"
change_mode = "restart"
}
config {
image = "quay.io/cephcsi/cephcsi:v3.3.1"
volumes = [
"./local/config.json:/etc/ceph-csi-config/config.json"
]
mounts = [
{
type = "tmpfs"
target = "/tmp/csi/keys"
readonly = false
tmpfs_options = {
size = 1000000 # size in bytes
}
}
]
args = [
"--type=rbd",
"--drivername=rbd.csi.ceph.com",
"--nodeserver=true",
"--endpoint=unix://csi/csi.sock",
"--nodeid=\${node.unique.name}",
"--instanceid=\${node.unique.name}-nodes",
"--pidlimit=-1",
"--v=5",
"--metricsport=\$\${NOMAD_PORT_metrics}"
]
privileged = true
}
resources {
cpu = 500
memory = 256
}
service {
name = "ceph-csi-nodes"
port = "metrics"
tags = [ "prometheus" ]
}
csi_plugin {
id = "ceph-csi"
type = "node"
mount_dir = "/csi"
}
}
}
}
EOC
## stop jobs.
nomad stop ceph-csi-plugin-controller;
nomad stop ceph-csi-plugin-nodes;
## run ceph csi plugin job.
nomad job run ceph-csi-plugin-controller.nomad;
nomad job run ceph-csi-plugin-nodes.nomad;
And then, I have created a volume.
## create volume.
cat <<EOF > ceph-volume.hcl
type = "csi"
id = "ceph-mysql"
name = "ceph-mysql"
external_id = "0001-0024-c628ebf1-d03f-4806-9941-8b5840338b14-0000000000000009-00000000-1111-2222-bbbb-cacacacacaca"
access_mode = "single-node-writer"
attachment_mode = "block-device"
mount_options {}
plugin_id = "ceph-csi"
secrets {
userID = "admin"
userKey = "AQBemoFg1vvcBBAAX81SSAlWEtoOWLubhnIyVA=="
}
context {
clusterID = "c628ebf1-d03f-4806-9941-8b5840338b14"
pool = "myPool"
imageFeatures = "layering"
}
EOF
nomad volume deregister ceph-mysql;
nomad volume register ceph-volume.hcl;
After registering a volume, the status of it looks like this.
[opc@minio-0 ~]$ nomad volume status ceph-mysql
ID = ceph-mysql
Name = ceph-mysql
External ID = 0001-0024-c628ebf1-d03f-4806-9941-8b5840338b14-0000000000000009-00000000-1111-2222-bbbb-cacacacacaca
Plugin ID = ceph-csi
Provider = rbd.csi.ceph.com
Version = v3.3.1
Schedulable = true
Controllers Healthy = 0
Controllers Expected = 1
Nodes Healthy = 2
Nodes Expected = 2
Access Mode = single-node-writer
Attachment Mode = block-device
Mount Options = <none>
Namespace = default
Allocations
No allocations placed
Before registering the volume, I have already created a pool with image in ceph.
# Create a ceph pool:
sudo ceph osd pool create myPool 64 64
# Create a block device pool:
sudo rbd pool init myPool
# create image.
sudo rbd create myimage --size 4096 --pool myPool --image-feature layering;
# associate pool to application.
sudo ceph osd pool application enable myPool rbd;
Let’s list pools in ceph.
[opc@minio-0 ~]$ sudo ceph osd lspools;
1 cephfs_data
2 cephfs_metadata
3 foo
4 bar
5 .rgw.root
6 default.rgw.control
7 default.rgw.meta
8 default.rgw.log
9 myPool
Finally, I have run mysql server job.
## deploy mysql.
cat <<EOF > mysql-server.nomad
job "mysql-server4" {
datacenters = ["dc1"]
type = "service"
group "mysql-server" {
count = 1
volume "ceph-mysql" {
type = "csi"
read_only = false
source = "ceph-mysql"
}
network {
port "db" {
static = 3306
}
}
restart {
attempts = 10
interval = "5m"
delay = "25s"
mode = "delay"
}
task "mysql-server" {
driver = "docker"
volume_mount {
volume = "ceph-mysql"
destination = "/srv"
read_only = false
}
env {
MYSQL_ROOT_PASSWORD = "password"
}
config {
image = "hashicorp/mysql-portworx-demo:latest"
args = ["--datadir", "/srv/mysql"]
ports = ["db"]
}
resources {
cpu = 500
memory = 1024
}
service {
name = "mysql-server"
port = "db"
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
}
}
}
EOF
# run mysql server job.
nomad job run mysql-server.nomad;
But the job has failed with the errors.
[opc@minio-0 ~]$ nomad alloc status 559fd00d
ID = 559fd00d-92b8-84bf-7082-3c1fb6dcf3da
Eval ID = d4072aa6
Name = mysql-server4.mysql-server[0]
Node ID = dd258138
Node Name = nomad-client-1
Job ID = mysql-server4
Job Version = 0
Client Status = failed
Client Description = Failed tasks
Desired Status = run
Desired Description = <none>
Created = 1m36s ago
Modified = 1m34s ago
Deployment ID = 71762f2d
Deployment Health = unhealthy
Reschedule Eligibility = 24s from now
Allocation Addresses
Label Dynamic Address
*db yes 10.0.0.7:3306
Task "mysql-server" is "dead"
Task Resources
CPU Memory Disk Addresses
500 MHz 1.0 GiB 300 MiB
CSI Volumes:
ID Read Only
ceph-mysql false
Task Events:
Started At = N/A
Finished At = 2021-04-23T14:30:21Z
Total Restarts = 0
Last Restart = N/A
Recent Events:
Time Type Description
2021-04-23T14:30:21Z Setup Failure failed to setup alloc: pre-run hook "csi_hook" failed: node plugin returned an internal error, check the plugin allocation logs for more information: rpc error: code = Internal desc = image not found: RBD image not found
2021-04-23T14:30:21Z Received Task received by client
I have no idea what image not found: RBD image not found
means.
I have used Ceph CSI v3.3.1 for this example for now, but I have already tried using another versions of ceph csi without success.
On Kubernetes, I have succeeded with dynamic volume provisioning using Ceph CSI v3.3.1 with external Ceph 14.x.
Is there anybody who has experienced using ceph csi with ceph 14.x(nautilus) without problem on Nomad?