Failed to download artifact : getter subprocess failed: exit status 1

Hello, this is Spanner. I really hope someone could help me with figuring out this issue. I encountered this error many of times while I was trying to download Hadoop binary file. I’ve tried downloading either different versions of Hadoop or different download link, but I still got the same result. Below is the job file that I wrote:

variables{
  hadoop_version="3.3.2"
  node1="nomad37"
  node2="nomad78"
  node3="nomad79"

  //core-site.xml
  hdfs_cluster_name = "dmpcluster"
  dfs_permission= "false"
  hadoop_tmp_dir= "local/data/hadoop/tmp/hadoop"
  journal_edit_dir= "local/data/journal/tmp/journal"

  //yarn-site.xml
  ha_status= "true"
  yarn_cluster_name= "dmpcluster"
  //yarn-site spec1
  yarn_scheduler_mem= "47104"
  yarn_scheduler_cpu= "24"
  yarn_node_mem= "47104"
  yarn_node_cpu= "24"
  pmem_check= "false"
  vmem_check= "false"
}

job "hadoop-test" {
    datacenters = ["dc1"]
    type = "service"
    update {
      max_parallel      = 3
      health_check      = "checks"
      min_healthy_time  = "10s"
      healthy_deadline  = "15m"
      progress_deadline = "20m"
      auto_revert       = true
      auto_promote      = true
      canary            = 1
      stagger           = "30s"
    }
   
    group "hadoop-test" {
        count = 1

        restart {
            attempts = 3
            interval = "3m"
            delay = "10s"
            mode = "fail"
        }

        affinity {
            attribute  = "${node.unique.name}"
            value     = "nomad37"
            weight    = 70
        }

        task "hd1" {
            driver = "exec"

            artifact {
        source = "https://dlcdn.apache.org/hadoop/common/hadoop-${var.hadoop_version}/hadoop-${var.hadoop_version}.tar.gz"
        destination = "local"
      }

      template {
                destination = "local/hadoop-${var.hadoop_version}/etc/hadoop/core-site.xml"
                change_mode     = "restart"
                data = <<EOF

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://${var.node1}:9000</value>
  </property>
  <property>
    <name>dfs.permissions</name>
    <value>${var.dfs_permission}</value>
  </property>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>${var.hadoop_tmp_dir}</value>
  </property>
  <property>
    <name>dfs.journalnode.edits.dir</name>
    <value>${var.journal_edit_dir}</value>
  </property>
</configuration>
EOF
    }

      template {
                destination = "local/hadoop-${var.hadoop_version}/etc/hadoop/yarn-site.xml"
                change_mode     = "restart"
                data = <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.enabled</name>
    <value>${var.ha_status}</value>
  </property>
  <property>
    <name>yarn.resourcemanager.cluster-id</name>
    <value>${var.yarn_cluster_name}</value>
  </property>
  <property>
    <name>yarn.resourcemanager.hostname</name>
    <value>${var.node1}</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address</name>
    <value>${var.node1}:8088</value>
  </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value>${var.yarn_scheduler_mem}</value>
  </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-vcores</name>
    <value>${var.yarn_scheduler_cpu}</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>${var.yarn_node_mem}</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.cpu-vcores</name>
    <value>${var.yarn_node_cpu}</value>
  </property>
  <property>
    <name>yarn.nodemanager.pmem-check-enabled</name>
    <value>${var.pmem_check}</value>
  </property>
  <property>
    <name>yarn.nodemanager.vmem-check-enabled</name>
    <value>${var.vmem_check}</value>
  </property>
</configuration>
EOF
      }

      template {
                destination = "local/hadoop-${var.hadoop_version}/etc/hadoop/hdfs-site.xml"
                change_mode     = "restart"
                data = <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>dfs.namenode.http-address</name>
    <value>${var.node1}:9000</value>
  </property>
  <property>
    <name>dfs.namenode.name.dir</name>
    <value>alloc/usr/local/hadoop/dfs/name</value>
  </property>
  <property>
    <name>dfs.datanode.data.dir</name>
    <value>alloc/usr/local/hadoop/dfs/data</value>
  </property>
  <property>
    <name>dfs.replication</name>
    <value>1</value>
  </property>
</configuration>
EOF
      }
            template {
                destination = "local/hadoop-${var.hadoop_version}/etc/hadoop/mapred-site.xml"
                change_mode     = "restart"
                data = <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
  </property>
</configuration>
EOF
      }

            config {
                command = "local/hadoop-${var.hadoop_version}/bin/hdfs"
                args=[
                   "namenode",
                  "-format"
                ]          
            }

            resources {
                cpu = 600
                memory = 4096
        }
    }
}
}

What is the version of the Nomad agent?

How many file inside the file hadoop-${var.hadoop_version}.tar.gz ?

The latest versions imposed a limit to how many files can be extracted by default.

The setting is now an agent side setting to override it.

ref:

Hello @shantanugadgil , many thanks to your reply.
I’m using Nomad v1.6.1.
I have set “decompression_file_count_limit” to “0” to not enforce a limit, but still got the same result.

psychic :crystal_ball: debugging …

  • Does nomad alloc logs -stderr say anything extra?
  • what user is the Nomad agent running as? Is it a permission issue inside the Nomad agent’s data_dir ?
  • I assume you have sshed into the node and verified that wget/curl actually works on cmdline.
  • if the download is slow, can you increase the deadline timeouts? (just-a-thought)

Hello, @shantanugadgil . Thanks for helping to debug.

  1. It didn’t show anything
  2. user is “nomad”
  3. Yes, I’ve checked that I could download the file on the machine via wget/curl command
  4. I have tested to use update stanza to change the healthy_deadline to 20m but it didn’t work

I noticed this error might come from “tar” issue, cuz I tried to re-install many times and folders could all be downloaded on alloc except for “sbin” folder. I’m wondering is there any permission possibility related to this folder? I also opened this issue at “hashicorp/go-getter” Failed to download artifact : getter subprocess failed: exit status 1 · Issue #447 · hashicorp/go-getter · GitHub