Skip to content

Issues when run MR job. "No valid local directories in property: mapred.local.dir" or runs locally #2

@tangzhankun

Description

@tangzhankun

Hi,
I am using hadoop-utils in my web app (spring) to implement a wordcount big data example. I can read/write to HDFS but cannot run wordcount in YARN cluster. The snippet code is as follows:

AppConfiguration helper = Configurations.newInstanceFromEnv();
Configuration yarnConf = helper.getServiceConfig("yarn-instance").asHadoopConfiguration();
Job job = Job.getInstance(yarnConf);
job.setJarByClass(WordCount.class);
job.setJobName("wc"+name);
job.setMapperClass(WordCount.Map.class);
job.setReducerClass(WordCount.Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.waitForCompletion(true);

These code will report "No valid local directories in property: mapred.local.dir" error. So I added the configuration then the error disappeared but it only runs the wordcount locally. Not running it in the remote yarn cluster.

yarnConf.set("mapred.local.dir","/tmp");

And I checked the app VCAP_SERVICES env, the yarn field is there and seems ok:

  "yarn": [
   {
    "credentials": {
     "HADOOP_CONFIG_KEY": {
      "dfs.blocksize": "134217728",
      "dfs.client.domain.socket.data.traffic": "false",
      "dfs.client.failover.proxy.provider.nameservice1": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
      "dfs.client.read.shortcircuit": "false",
      "dfs.client.read.shortcircuit.skip.checksum": "false",
      "dfs.client.use.datanode.hostname": "false",
      "dfs.datanode.hdfs-blocks-metadata.enabled": "true",
      "dfs.domain.socket.path": "/var/run/hdfs-sockets/dn",
      "dfs.encryption.key.provider.uri": "kms://http@cdh-master-0.node.trustedanalytics.consul:16000/kms",
      "dfs.ha.automatic-failover.enabled.nameservice1": "true",
      "dfs.ha.namenodes.nameservice1": "namenode14,namenode15",
      "dfs.namenode.acls.enabled": "false",
      "dfs.namenode.http-address.nameservice1.namenode14": "cdh-master-0.node.trustedanalytics.consul:50070",
      "dfs.namenode.http-address.nameservice1.namenode15": "cdh-master-1.node.trustedanalytics.consul:50070",
      "dfs.namenode.https-address.nameservice1.namenode14": "cdh-master-0.node.trustedanalytics.consul:50470",
      "dfs.namenode.https-address.nameservice1.namenode15": "cdh-master-1.node.trustedanalytics.consul:50470",
      "dfs.namenode.rpc-address.nameservice1.namenode14": "cdh-master-0.node.trustedanalytics.consul:8020",
      "dfs.namenode.rpc-address.nameservice1.namenode15": "cdh-master-1.node.trustedanalytics.consul:8020",
      "dfs.namenode.servicerpc-address.nameservice1.namenode14": "cdh-master-0.node.trustedanalytics.consul:8022",
      "dfs.namenode.servicerpc-address.nameservice1.namenode15": "cdh-master-1.node.trustedanalytics.consul:8022",
      "dfs.nameservices": "nameservice1",
      "dfs.replication": "3",
      "fs.defaultFS": "hdfs://nameservice1",
      "fs.permissions.umask-mode": "022",
      "ha.zookeeper.quorum": "cdh-master-0.node.trustedanalytics.consul:2181,cdh-master-1.node.trustedanalytics.consul:2181,cdh-master-2.node.trustedanalytics.consul:2181",
      "hadoop.proxyuser.HTTP.groups": "*",
      "hadoop.proxyuser.HTTP.hosts": "*",
      "hadoop.proxyuser.flume.groups": "*",
      "hadoop.proxyuser.flume.hosts": "*",
      "hadoop.proxyuser.hdfs.groups": "*",
      "hadoop.proxyuser.hdfs.hosts": "*",
      "hadoop.proxyuser.hive.groups": "*",
      "hadoop.proxyuser.hive.hosts": "*",
      "hadoop.proxyuser.httpfs.groups": "*",
      "hadoop.proxyuser.httpfs.hosts": "*",
      "hadoop.proxyuser.hue.groups": "*",
      "hadoop.proxyuser.hue.hosts": "*",
      "hadoop.proxyuser.mapred.groups": "*",
      "hadoop.proxyuser.mapred.hosts": "*",
      "hadoop.proxyuser.oozie.groups": "*",
      "hadoop.proxyuser.oozie.hosts": "*",
      "hadoop.proxyuser.vcap.groups": "*",
      "hadoop.proxyuser.vcap.hosts": "*",
      "hadoop.proxyuser.yarn.groups": "*",
      "hadoop.proxyuser.yarn.hosts": "*",
      "hadoop.rpc.protection": "authentication",
      "hadoop.security.auth_to_local": "DEFAULT",
      "hadoop.security.authentication": "simple",
      "hadoop.security.authorization": "false",
      "hadoop.security.group.mapping": "org.apache.hadoop.security.ShellBasedUnixGroupsMapping",
      "hadoop.security.instrumentation.requires.admin": "false",
      "hadoop.security.key.provider.path": "kms://http@cdh-master-0.node.trustedanalytics.consul:16000/kms",
      "hadoop.ssl.client.conf": "ssl-client.xml",
      "hadoop.ssl.enabled": "false",
      "hadoop.ssl.keystores.factory.class": "org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory",
      "hadoop.ssl.require.client.cert": "false",
      "hadoop.ssl.server.conf": "ssl-server.xml",
      "io.compression.codecs": "org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec",
      "io.file.buffer.size": "65536",
      "mapreduce.admin.user.env": "LD_LIBRARY_PATH=/lib/native:",
      "mapreduce.am.max-attempts": "2",
      "mapreduce.application.classpath": "/*,/lib/*,",
      "mapreduce.client.submit.file.replication": "10",
      "mapreduce.framework.name": "yarn",
      "mapreduce.job.counters.max": "120",
      "mapreduce.job.reduce.slowstart.completedmaps": "0.8",
      "mapreduce.job.reduces": "1",
      "mapreduce.job.split.metainfo.maxsize": "10000000",
      "mapreduce.job.ubertask.enable": "false",
      "mapreduce.jobhistory.address": "cdh-master-0.node.trustedanalytics.consul:10020",
      "mapreduce.jobhistory.admin.address": "cdh-master-0.node.trustedanalytics.consul:10033",
      "mapreduce.jobhistory.webapp.address": "cdh-master-0.node.trustedanalytics.consul:19888",
      "mapreduce.jobhistory.webapp.https.address": "cdh-master-0.node.trustedanalytics.consul:19890",
      "mapreduce.map.cpu.vcores": "1",
      "mapreduce.map.java.opts": "-Djava.net.preferIPv4Stack=true -Xmx825955249",
      "mapreduce.map.memory.mb": "1024",
      "mapreduce.map.output.compress": "true",
      "mapreduce.map.output.compress.codec": "org.apache.hadoop.io.compress.SnappyCodec",
      "mapreduce.map.sort.spill.percent": "0.8",
      "mapreduce.map.speculative": "false",
      "mapreduce.output.fileoutputformat.compress": "false",
      "mapreduce.output.fileoutputformat.compress.codec": "org.apache.hadoop.io.compress.DefaultCodec",
      "mapreduce.output.fileoutputformat.compress.type": "BLOCK",
      "mapreduce.reduce.cpu.vcores": "1",
      "mapreduce.reduce.java.opts": "-Djava.net.preferIPv4Stack=true -Xmx825955249",
      "mapreduce.reduce.memory.mb": "1024",
      "mapreduce.reduce.shuffle.parallelcopies": "10",
      "mapreduce.reduce.speculative": "false",
      "mapreduce.shuffle.max.connections": "80",
      "mapreduce.task.io.sort.factor": "64",
      "mapreduce.task.io.sort.mb": "256",
      "mapreduce.task.timeout": "600000",
      "net.topology.script.file.name": "/etc/hadoop/conf.cloudera.YARN/topology.py",
      "yarn.acl.enable": "true",
      "yarn.admin.acl": "*",
      "yarn.am.liveness-monitor.expiry-interval-ms": "600000",
      "yarn.app.mapreduce.am.admin.user.env": "LD_LIBRARY_PATH=/lib/native:",
      "yarn.app.mapreduce.am.command-opts": "-Djava.net.preferIPv4Stack=true -Xmx825955249",
      "yarn.app.mapreduce.am.resource.cpu-vcores": "1",
      "yarn.app.mapreduce.am.resource.mb": "1024",
      "yarn.app.mapreduce.am.staging-dir": "/user",
      "yarn.application.classpath": ",,/*,/lib/*,/*,/lib/*,/*,/lib/*",
      "yarn.client.failover-sleep-base-ms": "100",
      "yarn.client.failover-sleep-max-ms": "2000",
      "yarn.nm.liveness-monitor.expiry-interval-ms": "600000",
      "yarn.resourcemanager.address.rm32": "cdh-master-0.node.trustedanalytics.consul:8032",
      "yarn.resourcemanager.address.rm34": "cdh-master-1.node.trustedanalytics.consul:8032",
      "yarn.resourcemanager.admin.address.rm32": "cdh-master-0.node.trustedanalytics.consul:8033",
      "yarn.resourcemanager.admin.address.rm34": "cdh-master-1.node.trustedanalytics.consul:8033",
      "yarn.resourcemanager.admin.client.thread-count": "1",
      "yarn.resourcemanager.am.max-attempts": "2",
      "yarn.resourcemanager.amliveliness-monitor.interval-ms": "1000",
      "yarn.resourcemanager.client.thread-count": "50",
      "yarn.resourcemanager.cluster-id": "yarnRM",
      "yarn.resourcemanager.container.liveness-monitor.interval-ms": "600000",
      "yarn.resourcemanager.ha.automatic-failover.embedded": "true",
      "yarn.resourcemanager.ha.automatic-failover.enabled": "true",
      "yarn.resourcemanager.ha.enabled": "true",
      "yarn.resourcemanager.ha.rm-ids": "rm32,rm34",
      "yarn.resourcemanager.max-completed-applications": "10000",
      "yarn.resourcemanager.nm.liveness-monitor.interval-ms": "1000",
      "yarn.resourcemanager.recovery.enabled": "true",
      "yarn.resourcemanager.resource-tracker.address.rm32": "cdh-master-0.node.trustedanalytics.consul:8031",
      "yarn.resourcemanager.resource-tracker.address.rm34": "cdh-master-1.node.trustedanalytics.consul:8031",
      "yarn.resourcemanager.resource-tracker.client.thread-count": "50",
      "yarn.resourcemanager.scheduler.address.rm32": "cdh-master-0.node.trustedanalytics.consul:8030",
      "yarn.resourcemanager.scheduler.address.rm34": "cdh-master-1.node.trustedanalytics.consul:8030",
      "yarn.resourcemanager.scheduler.class": "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler",
      "yarn.resourcemanager.scheduler.client.thread-count": "50",
      "yarn.resourcemanager.store.class": "org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore",
      "yarn.resourcemanager.webapp.address.rm32": "cdh-master-0.node.trustedanalytics.consul:8088",
      "yarn.resourcemanager.webapp.address.rm34": "cdh-master-1.node.trustedanalytics.consul:8088",
      "yarn.resourcemanager.webapp.https.address.rm32": "cdh-master-0.node.trustedanalytics.consul:8090",
      "yarn.resourcemanager.webapp.https.address.rm34": "cdh-master-1.node.trustedanalytics.consul:8090",
      "yarn.resourcemanager.zk-address": "cdh-master-0.node.trustedanalytics.consul:2181,cdh-master-1.node.trustedanalytics.consul:2181,cdh-master-2.node.trustedanalytics.consul:2181",
      "yarn.scheduler.fair.assignmultiple": "false",
      "yarn.scheduler.fair.preemption": "false",
      "yarn.scheduler.fair.sizebasedweight": "false",
      "yarn.scheduler.fair.user-as-default-queue": "true",
      "yarn.scheduler.increment-allocation-mb": "512",
      "yarn.scheduler.increment-allocation-vcores": "1",
      "yarn.scheduler.maximum-allocation-mb": "65536",
      "yarn.scheduler.maximum-allocation-vcores": "32",
      "yarn.scheduler.minimum-allocation-mb": "1024",
      "yarn.scheduler.minimum-allocation-vcores": "1",
      "zlib.compress.level": "DEFAULT_COMPRESSION"
     },
     "kerberos": {
      "kdc": "",
      "krealm": ""
     }
    },
    "label": "yarn",
    "name": "yarn-instance",
    "plan": "shared",
    "tags": []
   }
  ]
 }
}

Can you help with this? thanks in advance.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions