Installing Boundary with Nomad

Hello,

I am trying to install Boundary via Nomad and am running into a few issues.

This is my nomad config:

job "boundary-controller" {
  region      = "global"
  datacenters = ["dc1"]
  type        = "service"

  reschedule {
    delay = "30s"
    delay_function = "constant"
    unlimited = true
  }

  update {
    max_parallel      = 1
    health_check      = "checks"
    min_healthy_time  = "10s"
    healthy_deadline  = "5m"
    progress_deadline = "10m"
    auto_revert       = true
    canary            = 0
    stagger           = "30s"
  }

  group "boundary-controller" {
    count = 1

    restart {
      interval = "10m"
      attempts = 2
      delay    = "15s"
      mode     = "fail"
    }

    network {
      port "api" {
        static = 9200
      }

      port "cluster" {
        static = 9201
      }

      port "proxy" {
        static = 9202
      }
    }

    service {
      name = "boundary-controller"

      check {
        name     = "alive"
        type     = "tcp"
        port     = "api"
        interval = "10s"
        timeout  = "2s"
      }

      check_restart {
          limit = 3
          grace = "10s"
          ignore_warnings = false
      }
    }

    task "boundary-controller" {
      driver = "docker"

      config {
        image        = "hashicorp/boundary:0.1.8"
        network_mode = "host"

        volumes = [
          "local/boundary.hcl:/boundary/config.hcl",
          // "/mnt/nomad-shared/config/boundary:/etc/traefik/acme",
        ]
      }

      vault {
        policies = ["nomad-cluster"]
      }

      template {
        data = <<EOF
# Disable memory lock: https://www.man7.org/linux/man-pages/man2/mlock.2.html
disable_mlock = true

# Controller configuration block
controller {
  # This name attr must be unique across all controller instances if running in HA mode
  name = "{{env "NOMAD_ALLOC_ID"}}"

  # Database URL for postgres. This can be a direct "postgres://"
  # URL, or it can be "file://" to read the contents of a file to
  # supply the url, or "env://" to name an environment variable
  # that contains the URL.
  database {
      url = "postgresql://postgres:{{with secret "secret/data/postgres/creds/postgres"}}{{.Data.data.password}}{{end}}@master.postgres.service.consul:5432/boundary?sslmode=disable"
  }
}

worker {
  # Name attr must be unique across workers
  name = "{{env "NOMAD_ALLOC_ID"}}-worker"
  description = "A default worker created demonstration"

  # Workers must be able to reach controllers on :9202
#  controllers = [
#    "{{env "NOMAD_IP_cluster"}}",
#  ]

  public_addr = "boundary.<removed>"
}

# API listener configuration block
listener "tcp" {
  # Should be the address of the NIC that the controller server will be reached on
  address = "{{env "NOMAD_IP_cluster"}}"
  # The purpose of this listener block
  purpose = "api"

  tls_disable = true

  # Uncomment to enable CORS for the Admin UI. Be sure to set the allowed origin(s)
  # to appropriate values.
  cors_enabled = true
  cors_allowed_origins = ["boundary.<removed>", "boundary.corp.<removed>"]
}

# Data-plane listener configuration block (used for worker coordination)
listener "tcp" {
  # Should be the IP of the NIC that the worker will connect on
  address = "{{env "NOMAD_IP_cluster"}}"
  # The purpose of this listener
  purpose = "cluster"

  tls_disable = true
}

listener "tcp" {
    purpose = "proxy"
    tls_disable = true
    address = "{{env "NOMAD_IP_cluster"}}"
}

# Root KMS configuration block: this is the root key for Boundary
# Use a production KMS such as AWS KMS in production installs
kms "transit" {
  purpose            = "root"
  address            = "http://vault.service.consul:8200"
  token              = "{{with secret "secret/data/boundary/creds/token"}}{{.Data.data.token}}{{end}}"
  disable_renewal    = "false"

  // Key configuration
  key_name           = "boundary_root_key"
  mount_path         = "transit/"
}

# Worker authorization KMS
# Use a production KMS such as AWS KMS for production installs
# This key is the same key used in the worker configuration

kms "transit" {
  purpose            = "worker-auth"
  address            = "http://vault.service.consul:8200"
  token              = "{{with secret "secret/data/boundary/creds/token"}}{{.Data.data.token}}{{end}}"
  disable_renewal    = "false"

  // Key configuration
  key_name           = "boundary_global_worker-auth"
  mount_path         = "transit/"
}

# Recovery KMS block: configures the recovery key for Boundary
# Use a production KMS such as AWS KMS for production installs

kms "transit" {
  purpose            = "recovery"
  address            = "http://vault.service.consul:8200"
  token              = "{{with secret "secret/data/boundary/creds/token"}}{{.Data.data.token}}{{end}}"
  disable_renewal    = "false"

  // Key configuration
  key_name           = "boundary_global_recovery"
  mount_path         = "transit/"
}
EOF

        destination = "local/boundary.hcl"
      }

      resources {
        cpu    = 100
        memory = 128
      }
    }
  }
}

It seems to mostly be working, however, when I run the job, the container exits with an error that the database has not been initialized.

Per the instructions from the docker container (which should be included with the main install instructions?), I have run the following on my laptop:

docker run --network host -e 'BOUNDARY_POSTGRES_URL=postgresql://postgres:<removed>@master.postgress.service.consul:5432/boundary?sslmode=disable' -v "$(pwd)":/boundary/ hashicorp/boundary database init -config /boundary/config.hcl

This does initialize the database, and prints a similar output to what is printed in dev mode.

I then restart the job on the nomad cluster, and it seems to work (throws a couple of warnings about not being able to renew keys:

kms-recovery-transit: unable to renew token, disabling renewal: err="Error making API request.

URL: PUT http://vault.service.consul:8200/v1/auth/token/renew-self
Code: 400. Errors:

The problem is that once I load the admin UI (http://boundary-controller.service.consul:9200), I cannot log in with the information printed by the local database init on my local machine. I just get an error message that authentication failed.

I am not sure what I am missing to get this working.

This appears to have been a browser issue, opening the page in a private window or different browser seems to have fixed the problem of not being able to login.

I’m still slightly concerned about the issue renewing tokens, and the hard to find documentation though.

Do you have this permissions for your periodic token/token role?

path "auth/token/renew-self" {
  capabilities = ["update"]
}

When I did this with Nomad I had the following end to end permissions in policy:

path "transit/encrypt/boundary_root" {
  capabilities = ["update"]
}

path "transit/decrypt/boundary_root" {
  capabilities = ["update"]
}

path "transit/encrypt/boundary_worker_auth" {
  capabilities = ["update"]
}

path "transit/decrypt/boundary_worker_auth" {
  capabilities = ["update"]
}


path "transit/encrypt/boundary_recovery" {
  capabilities = ["update"]
}

path "transit/decrypt/boundary_recovery" {
  capabilities = ["update"]
}

# Allow our own token to be renewed.
path "auth/token/renew-self" {
  capabilities = ["update"]
}

# Generate/update a certificate for listeners
path "intermediate_ca/issue/boundary" {
  capabilities = ["update"]
}

I’m not sure if the policy I have set for that job in nomad has those permissions, I’ll have to check.