HI, i am a newer about mrjob and hadoop, after i build my hadoop cluster, i try to use mrjob submit the job to hadoop,
but unfortunatly, it failed with the error "returned non-zero exit status 256".more details as follow:
my enviroment:
hadoop:2.7.1
python:2.7.9
#! /usr/bin/python from mrjob.job import MRJob import re WORD_RE = re.compile(r"[\w']+") class MRWordFreqCount(MRJob): def mapper(self, _, line): for word in WORD_RE.findall(line): yield (word.lower(), 1) def combiner(self, word, counts): yield (word, sum(counts)) def reducer(self, word, counts): yield (word, sum(counts)) if __name__ == '__main__': MRWordFreqCount.run()this is my example:
python test.py -r hadoop -v ./pg20417.txt > output
[root@master hadoop-2.7.1]# cat /etc/mrjob.conf
{
"runners":{
"hadoop":{
"interpreter": "/root/.pyenv/versions/2.7.9/bin/python",
"hadoop_bin": "/diskb/dxb/code/hadoop-2.7.1/bin/hadoop"
}
}
}
[root@master hadoop-2.7.1]# python test.py -r hadoop -v ./pg20417.txt > output
looking for configs in /root/.mrjob.conf
looking for configs in /etc/mrjob.conf
using configs in /etc/mrjob.conf
Active configuration:
{'base_tmp_dir': '/tmp',
'bootstrap_mrjob': True,
'check_input_paths': True,
'cleanup': ['ALL'],
'cleanup_on_failure': ['NONE'],
'cmdenv': {},
'hadoop_bin': ['/diskb/dxb/code/hadoop-2.7.1/bin/hadoop'],
'hadoop_extra_args': [],
'hadoop_home': '/diskb/dxb/code/hadoop-2.7.1',
'hadoop_streaming_jar': None,
'hadoop_version': '0.20',
'hdfs_scratch_dir': 'tmp/mrjob',
'interpreter': ['/root/.pyenv/versions/2.7.9/bin/python'],
'job_name': None,
'jobconf': {},
'label': None,
'owner': 'root',
'python_archives': [],
'python_bin': ['python'],
'setup': [],
'setup_cmds': [],
'setup_scripts': [],
'sh_bin': ['sh', '-ex'],
'steps_interpreter': ['/root/.pyenv/versions/2.7.9/bin/python'],
'steps_python_bin': ['/root/.pyenv/versions/2.7.9/bin/python'],
'strict_protocols': None,
'upload_archives': [],
'upload_files': []}
Looking for hadoop streaming jar in /diskb/dxb/code/hadoop-2.7.1
Hadoop streaming jar is /diskb/dxb/code/hadoop-2.7.1/share/hadoop/tools/lib/hadoop-streaming-2.7.1.jar
creating tmp directory /tmp/test.root.20150727.031714.703094
archiving /root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob -> /tmp/test.root.20150727.031714.703094/mrjob.tar.gz as mrjob/
writing wrapper script to /tmp/test.root.20150727.031714.703094/setup-wrapper.sh
WRAPPER: # store $PWD
WRAPPER: __mrjob_PWD=$PWD
WRAPPER:
WRAPPER: # obtain exclusive file lock
WRAPPER: exec 9>/tmp/wrapper.lock.test.root.20150727.031714.703094
WRAPPER: python -c 'import fcntl; fcntl.flock(9, fcntl.LOCK_EX)'
WRAPPER:
WRAPPER: # setup commands
WRAPPER: {
WRAPPER: export PYTHONPATH=$__mrjob_PWD/mrjob.tar.gz:$PYTHONPATH
WRAPPER: } 0</dev/null 1>&2
WRAPPER:
WRAPPER: # release exclusive file lock
WRAPPER: exec 9>&-
WRAPPER:
WRAPPER: # run task from the original working directory
WRAPPER: cd $__mrjob_PWD
WRAPPER: "$@"
Making directory hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/ on HDFS
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop version
Using Hadoop version 2.7.1
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop fs -mkdir -p hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/
Copying local files into hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/
Uploading ./pg20417.txt -> hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/pg20417.txt on HDFS
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop fs -put ./pg20417.txt hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/pg20417.txt
Uploading /diskb/dxb/code/hadoop-2.7.1/test.py -> hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/test.py on HDFS
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop fs -put /diskb/dxb/code/hadoop-2.7.1/test.py hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/test.py
Uploading /tmp/test.root.20150727.031714.703094/setup-wrapper.sh -> hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/setup-wrapper.sh on HDFS
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop fs -put /tmp/test.root.20150727.031714.703094/setup-wrapper.sh hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/setup-wrapper.sh
Uploading /tmp/test.root.20150727.031714.703094/mrjob.tar.gz -> hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/mrjob.tar.gz on HDFS
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop fs -put /tmp/test.root.20150727.031714.703094/mrjob.tar.gz hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/mrjob.tar.gz
> /root/.pyenv/versions/2.7.9/bin/python /diskb/dxb/code/hadoop-2.7.1/test.py --steps
running step 1 of 1
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop jar /diskb/dxb/code/hadoop-2.7.1/share/hadoop/tools/lib/hadoop-streaming-2.7.1.jar -files 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/test.py#test.py,hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/setup-wrapper.sh#setup-wrapper.sh' -archives 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/mrjob.tar.gz#mrjob.tar.gz' -input hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/pg20417.txt -output hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/output -mapper 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --mapper' -combiner 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --combiner' -reducer 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --reducer'
HADOOP: packageJobJar: [/tmp/hadoop-unjar4465312523117728891/] [] /tmp/streamjob4994401619229872585.jar tmpDir=null
HADOOP: Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
HADOOP: at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322)
HADOOP: at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535)
HADOOP: at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
HADOOP: at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
HADOOP: at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
HADOOP: at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
HADOOP: at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
HADOOP: at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
HADOOP: at java.security.AccessController.doPrivileged(Native Method)
HADOOP: at javax.security.auth.Subject.doAs(Subject.java:415)
HADOOP: at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
HADOOP: at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
HADOOP:
HADOOP: Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
HADOOP: at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322)
HADOOP: at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535)
HADOOP: at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
HADOOP: at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
HADOOP: at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
HADOOP: at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
HADOOP: at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
HADOOP: at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
HADOOP: at java.security.AccessController.doPrivileged(Native Method)
HADOOP: at javax.security.auth.Subject.doAs(Subject.java:415)
HADOOP: at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
HADOOP: at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
HADOOP:
HADOOP: Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
HADOOP: at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322)
HADOOP: at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535)
HADOOP: at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
HADOOP: at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
HADOOP: at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
HADOOP: at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
HADOOP: at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
HADOOP: at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
HADOOP: at java.security.AccessController.doPrivileged(Native Method)
HADOOP: at javax.security.auth.Subject.doAs(Subject.java:415)
HADOOP: at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
HADOOP: at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
HADOOP:
HADOOP: Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
HADOOP: at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322)
HADOOP: at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535)
HADOOP: at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
HADOOP: at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
HADOOP: at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
HADOOP: at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
HADOOP: at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
HADOOP: at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
HADOOP: at java.security.AccessController.doPrivileged(Native Method)
HADOOP: at javax.security.auth.Subject.doAs(Subject.java:415)
HADOOP: at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
HADOOP: at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
HADOOP:
HADOOP: Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
HADOOP: at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322)
HADOOP: at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535)
HADOOP: at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
HADOOP: at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
HADOOP: at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
HADOOP: at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
HADOOP: at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
HADOOP: at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
HADOOP: at java.security.AccessController.doPrivileged(Native Method)
HADOOP: at javax.security.auth.Subject.doAs(Subject.java:415)
HADOOP: at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
HADOOP: at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
HADOOP:
HADOOP: Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
HADOOP: at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322)
HADOOP: at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535)
HADOOP: at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
HADOOP: at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
HADOOP: at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
HADOOP: at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
HADOOP: at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
HADOOP: at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
HADOOP: at java.security.AccessController.doPrivileged(Native Method)
HADOOP: at javax.security.auth.Subject.doAs(Subject.java:415)
HADOOP: at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
HADOOP: at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
HADOOP:
HADOOP: Streaming Command Failed!
Job failed with return code 256: ['/diskb/dxb/code/hadoop-2.7.1/bin/hadoop', 'jar', '/diskb/dxb/code/hadoop-2.7.1/share/hadoop/tools/lib/hadoop-streaming-2.7.1.jar', '-files', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/test.py#test.py,hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/setup-wrapper.sh#setup-wrapper.sh', '-archives', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/mrjob.tar.gz#mrjob.tar.gz', '-input', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/pg20417.txt', '-output', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/output', '-mapper', 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --mapper', '-combiner', 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --combiner', '-reducer', 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --reducer']
Scanning logs for probable cause of failure
Traceback (most recent call last):
File "test.py", line 23, in <module>
MRWordFreqCount.run()
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/job.py", line 461, in run
mr_job.execute()
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/job.py", line 479, in execute
super(MRJob, self).execute()
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/launch.py", line 151, in execute
self.run_job()
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/launch.py", line 214, in run_job
runner.run()
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/runner.py", line 464, in run
self._run()
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/hadoop.py", line 237, in _run
self._run_job_in_hadoop()
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/hadoop.py", line 372, in _run_job_in_hadoop
raise CalledProcessError(returncode, step_args)
subprocess.CalledProcessError: Command '['/diskb/dxb/code/hadoop-2.7.1/bin/hadoop', 'jar', '/diskb/dxb/code/hadoop-2.7.1/share/hadoop/tools/lib/hadoop-streaming-2.7.1.jar', '-files', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/test.py#test.py,hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/setup-wrapper.sh#setup-wrapper.sh', '-archives', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/mrjob.tar.gz#mrjob.tar.gz', '-input', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/pg20417.txt', '-output', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/output', '-mapper', 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --mapper', '-combiner', 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --combiner', '-reducer', 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --reducer']' returned non-zero exit status 256
/bin/sh: module: line 1: syntax error: unexpected end of file
log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.impl.MetricsSystemImpl).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.