set up mrjob with hadoop failed with the error "returned non-zero exit status 256

482 views
Skip to first unread message

8514...@qq.com

unread,
Jul 27, 2015, 11:35:47 AM7/27/15
to mrjob

       HI, i am a newer about mrjob and hadoop, after i build my hadoop cluster, i try to use mrjob submit the job to hadoop,
but unfortunatly, it failed with the error "returned non-zero exit status 256".more details as follow:

my enviroment:


hadoop:2.7.1
python:2.7.9
  1. this is my example:

#! /usr/bin/python

from mrjob.job import MRJob
import re

WORD_RE = re.compile(r"[\w']+")


class MRWordFreqCount(MRJob):

    def mapper(self, _, line):
        for word in WORD_RE.findall(line):
            yield (word.lower(), 1)

    def combiner(self, word, counts):
        yield (word, sum(counts))

    def reducer(self, word, counts):
        yield (word, sum(counts))


if __name__ == '__main__':
     MRWordFreqCount.run()this is my example:

  2. and i use this command:
python test.py -r hadoop -v ./pg20417.txt  > output

  3 my config is:
[root@master hadoop-2.7.1]# cat  /etc/mrjob.conf 
{
   
"runners":{
       
"hadoop":{
           
"interpreter": "/root/.pyenv/versions/2.7.9/bin/python",
           
"hadoop_bin": "/diskb/dxb/code/hadoop-2.7.1/bin/hadoop"
         
}
     
}
}

    4.
the result is :
[root@master hadoop-2.7.1]# python test.py -r hadoop -v ./pg20417.txt  > output
looking
for configs in /root/.mrjob.conf
looking
for configs in /etc/mrjob.conf
using configs in /etc/mrjob.conf
Active configuration:
{'base_tmp_dir': '/tmp',
 
'bootstrap_mrjob': True,
 
'check_input_paths': True,
 
'cleanup': ['ALL'],
 
'cleanup_on_failure': ['NONE'],
 
'cmdenv': {},
 
'hadoop_bin': ['/diskb/dxb/code/hadoop-2.7.1/bin/hadoop'],
 
'hadoop_extra_args': [],
 
'hadoop_home': '/diskb/dxb/code/hadoop-2.7.1',
 
'hadoop_streaming_jar': None,
 
'hadoop_version': '0.20',
 
'hdfs_scratch_dir': 'tmp/mrjob',
 
'interpreter': ['/root/.pyenv/versions/2.7.9/bin/python'],
 
'job_name': None,
 
'jobconf': {},
 
'label': None,
 
'owner': 'root',
 
'python_archives': [],
 
'python_bin': ['python'],
 
'setup': [],
 
'setup_cmds': [],
 
'setup_scripts': [],
 
'sh_bin': ['sh', '-ex'],
 
'steps_interpreter': ['/root/.pyenv/versions/2.7.9/bin/python'],
 
'steps_python_bin': ['/root/.pyenv/versions/2.7.9/bin/python'],
 
'strict_protocols': None,
 
'upload_archives': [],
 
'upload_files': []}
Looking for hadoop streaming jar in /diskb/dxb/code/hadoop-2.7.1
Hadoop streaming jar is /diskb/dxb/code/hadoop-2.7.1/share/hadoop/tools/lib/hadoop-streaming-2.7.1.jar
creating tmp directory
/tmp/test.root.20150727.031714.703094
archiving
/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob -> /tmp/test.root.20150727.031714.703094/mrjob.tar.gz as mrjob/
writing wrapper script to
/tmp/test.root.20150727.031714.703094/setup-wrapper.sh
WRAPPER
: # store $PWD
WRAPPER
: __mrjob_PWD=$PWD
WRAPPER
:
WRAPPER
: # obtain exclusive file lock
WRAPPER
: exec 9>/tmp/wrapper.lock.test.root.20150727.031714.703094
WRAPPER
: python -c 'import fcntl; fcntl.flock(9, fcntl.LOCK_EX)'
WRAPPER
:
WRAPPER
: # setup commands
WRAPPER
: {
WRAPPER
:   export PYTHONPATH=$__mrjob_PWD/mrjob.tar.gz:$PYTHONPATH
WRAPPER
: } 0</dev/null 1>&2
WRAPPER
:
WRAPPER
: # release exclusive file lock
WRAPPER
: exec 9>&-
WRAPPER
:
WRAPPER
: # run task from the original working directory
WRAPPER
: cd $__mrjob_PWD
WRAPPER
: "$@"
Making directory hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/ on HDFS
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop version
Using Hadoop version 2.7.1
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop fs -mkdir -p hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/
Copying local files into hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/
Uploading ./pg20417.txt -> hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/pg20417.txt on HDFS
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop fs -put ./pg20417.txt hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/pg20417.txt
Uploading /diskb/dxb/code/hadoop-2.7.1/test.py -> hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/test.py on HDFS
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop fs -put /diskb/dxb/code/hadoop-2.7.1/test.py hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/test.py
Uploading /tmp/test.root.20150727.031714.703094/setup-wrapper.sh -> hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/setup-wrapper.sh on HDFS
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop fs -put /tmp/test.root.20150727.031714.703094/setup-wrapper.sh hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/setup-wrapper.sh
Uploading /tmp/test.root.20150727.031714.703094/mrjob.tar.gz -> hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/mrjob.tar.gz on HDFS
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop fs -put /tmp/test.root.20150727.031714.703094/mrjob.tar.gz hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/mrjob.tar.gz
> /root/.pyenv/versions/2.7.9/bin/python /diskb/dxb/code/hadoop-2.7.1/test.py --steps
running step
1 of 1
> /diskb/dxb/code/hadoop-2.7.1/bin/hadoop jar /diskb/dxb/code/hadoop-2.7.1/share/hadoop/tools/lib/hadoop-streaming-2.7.1.jar -files 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/test.py#test.py,hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/setup-wrapper.sh#setup-wrapper.sh' -archives 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/mrjob.tar.gz#mrjob.tar.gz' -input hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/pg20417.txt -output hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/output -mapper 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --mapper' -combiner 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --combiner' -reducer 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --reducer'
HADOOP
: packageJobJar: [/tmp/hadoop-unjar4465312523117728891/] [] /tmp/streamjob4994401619229872585.jar tmpDir=null
HADOOP
: Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
HADOOP
:     at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
HADOOP
:     at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
HADOOP
:     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
HADOOP
:     at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
HADOOP
:     at java.security.AccessController.doPrivileged(Native Method)
HADOOP
:     at javax.security.auth.Subject.doAs(Subject.java:415)
HADOOP
:     at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
HADOOP
:     at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
HADOOP
:
HADOOP
: Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
HADOOP
:     at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
HADOOP
:     at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
HADOOP
:     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
HADOOP
:     at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
HADOOP
:     at java.security.AccessController.doPrivileged(Native Method)
HADOOP
:     at javax.security.auth.Subject.doAs(Subject.java:415)
HADOOP
:     at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
HADOOP
:     at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
HADOOP
:
HADOOP
: Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
HADOOP
:     at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
HADOOP
:     at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
HADOOP
:     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
HADOOP
:     at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
HADOOP
:     at java.security.AccessController.doPrivileged(Native Method)
HADOOP
:     at javax.security.auth.Subject.doAs(Subject.java:415)
HADOOP
:     at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
HADOOP
:     at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
HADOOP
:
HADOOP
: Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
HADOOP
:     at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
HADOOP
:     at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
HADOOP
:     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
HADOOP
:     at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
HADOOP
:     at java.security.AccessController.doPrivileged(Native Method)
HADOOP
:     at javax.security.auth.Subject.doAs(Subject.java:415)
HADOOP
:     at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
HADOOP
:     at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
HADOOP
:
HADOOP
: Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
HADOOP
:     at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
HADOOP
:     at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
HADOOP
:     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
HADOOP
:     at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
HADOOP
:     at java.security.AccessController.doPrivileged(Native Method)
HADOOP
:     at javax.security.auth.Subject.doAs(Subject.java:415)
HADOOP
:     at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
HADOOP
:     at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
HADOOP
:
HADOOP
: Error: java.lang.RuntimeException: PipeMapRed.waitOutputThreads(): subprocess failed with code 2
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads(PipeMapRed.java:322)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished(PipeMapRed.java:535)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapper.close(PipeMapper.java:130)
HADOOP
:     at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:61)
HADOOP
:     at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
HADOOP
:     at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
HADOOP
:     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
HADOOP
:     at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
HADOOP
:     at java.security.AccessController.doPrivileged(Native Method)
HADOOP
:     at javax.security.auth.Subject.doAs(Subject.java:415)
HADOOP
:     at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
HADOOP
:     at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
HADOOP
:
HADOOP
: Streaming Command Failed!
Job failed with return code 256: ['/diskb/dxb/code/hadoop-2.7.1/bin/hadoop', 'jar', '/diskb/dxb/code/hadoop-2.7.1/share/hadoop/tools/lib/hadoop-streaming-2.7.1.jar', '-files', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/test.py#test.py,hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/setup-wrapper.sh#setup-wrapper.sh', '-archives', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/mrjob.tar.gz#mrjob.tar.gz', '-input', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/pg20417.txt', '-output', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/output', '-mapper', 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --mapper', '-combiner', 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --combiner', '-reducer', 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --reducer']
Scanning logs for probable cause of failure
Traceback (most recent call last):
 
File "test.py", line 23, in <module>
   
MRWordFreqCount.run()
 
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/job.py", line 461, in run
    mr_job
.execute()
 
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/job.py", line 479, in execute
   
super(MRJob, self).execute()
 
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/launch.py", line 151, in execute
   
self.run_job()
 
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/launch.py", line 214, in run_job
    runner
.run()
 
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/runner.py", line 464, in run
   
self._run()
 
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/hadoop.py", line 237, in _run
   
self._run_job_in_hadoop()
 
File "/root/.pyenv/versions/2.7.9/lib/python2.7/site-packages/mrjob/hadoop.py", line 372, in _run_job_in_hadoop
   
raise CalledProcessError(returncode, step_args)
subprocess
.CalledProcessError: Command '['/diskb/dxb/code/hadoop-2.7.1/bin/hadoop', 'jar', '/diskb/dxb/code/hadoop-2.7.1/share/hadoop/tools/lib/hadoop-streaming-2.7.1.jar', '-files', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/test.py#test.py,hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/setup-wrapper.sh#setup-wrapper.sh', '-archives', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/mrjob.tar.gz#mrjob.tar.gz', '-input', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/files/pg20417.txt', '-output', 'hdfs:///user/root/tmp/mrjob/test.root.20150727.031714.703094/output', '-mapper', 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --mapper', '-combiner', 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --combiner', '-reducer', 'sh -ex setup-wrapper.sh /root/.pyenv/versions/2.7.9/bin/python test.py --step-num=0 --reducer']' returned non-zero exit status 256
how can i fix it ?
PS:
in my stderr, it shows:
/bin/sh: module: line 1: syntax error: unexpected end of file
log4j
:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.impl.MetricsSystemImpl).
log4j
:WARN Please initialize the log4j system properly.
log4j
:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.








Reply all
Reply to author
Forward
0 new messages