SGE command file for Hadoop jobs in clusters using SGE scheduler

15 views
Skip to first unread message

Prakashan Korambath

unread,
Jul 27, 2010, 9:06:56 PM7/27/10
to VSCSE Big Data for Science 2010
This is a Python script I posted in UC Grid mainling list back in
January 2010 for running Hadoop on a cluster using Sun Grid Engine
scheduler. If there is any real interest in this group I can
probably go back and test it using latest Hadoop. This worked fine
at that time.

Prakashan



SGE command file is below. Tested on 4 nodes with 4 slots
each. Parallel environment is configured to give four
complete nodes to Hadoop.

#!/bin/sh
# hdfsprology.py.cmd
#
#$ -o out.$JOB_ID
#$ -cwd
#$ -j y
#$ -pe hadooppe 16

export HADOOP_HOME=/u/home/ppk/hadoop-0.20.1
cd /u/home/ppk/hadoop-0.20.1
/u/home/ppk/bin/python/hdfsprolog.py $PE_HOSTFILE $TMPDIR 4
sleep 10
/u/home/ppk/hadoop-0.20.1/bin/hadoop namenode -format
sleep 60
/u/home/ppk/hadoop-0.20.1/bin/start-dfs.sh
sleep 20
/u/home/ppk/hadoop-0.20.1/bin/start-mapred.sh
sleep 20
/u/home/ppk/hadoop-0.20.1/bin/hadoop fs -put
/u/home/ppk/hadoop-0.20.1/conf input
/u/home/ppk/hadoop-0.20.1/bin/hadoop jar
hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
/u/home/ppk/hadoop-0.20.1/bin/hadoop fs -get output
/u/home/ppk/hadoop-0.20.1/output
/u/home/ppk/hadoop-0.20.1/bin/stop-mapred.sh
/u/home/ppk/hadoop-0.20.1/bin/stop-dfs.sh

exit


The Python Script is here:
/u/home/ppk/bin/python/hdfsprolog.py


#!/usr/bin/python
# * Copyright 2010. The Regents of the University of
California.
# * All Rights Reserved. Use is subject to licence terms.
# * @author Prakashan Korambath (Jan, 2010)
# * SGE prolog script for Hadoop

import os, sys, re

def getmaster(file, masters, slaves):
try:
input_file = open(file, 'r')
except IOError:
print "Could not open " + file
try:
masters_file = open(masters, 'w')
except IOError:
print "Could not open " + masters
try:
slaves_file = open(slaves, 'w')
except IOError:
print "Could not open " + slaves

line = input_file.readline()
master = line.split()
master = master[0]
masters_file.write(master + '\n')
slaves_file.write(master + '\n')
while line:
line = input_file.readline()
slaves = line.split()
if line !="":
slaves=slaves[0]
slaves_file.write(slaves + '\n')
input_file.close()
masters_file.close()
slaves_file.close()
return master


def fileheader(file):
file.write("<?xml version=\"1.0\"?>" + '\n')
file.write("<?xml-stylesheet type=\"text/xsl\"
href=\"configuration.xsl\"?>" + '\n')
file.write("<configuration>" + '\n')

def filetail(file):
file.write("</configuration>" + '\n')

def createhdfssite(nodes, file):
try:
output_file = open(file, 'w')
except IOError:
print "Could not open " + file
fileheader(output_file)
output_file.write("<property>" + '\n')
output_file.write(" <name>dfs.replication</name>" + '\n')
output_file.write(" <value>"+nodes+"</value>" + '\n')
output_file.write("</property>" + '\n')
filetail(output_file)

output_file.close()

def createmapredsite(master, file):
try:
output_file = open(file, 'w')
except IOError:
print "Could not open " + file
fileheader(output_file)
output_file.write("<property>" + '\n')
output_file.write(" <name>mapred.job.tracker</name>"
+ '\n')
output_file.write(" <value>"+ master+ ":9001</value>"
+ '\n')
output_file.write("</property>" + '\n')
filetail(output_file)

output_file.close()

def createcoresite(master, tmpdir, file):
try:
output_file = open(file, 'w')
except IOError:
print "Could not open " + file

fileheader(output_file)

output_file.write("<property>" + '\n')
output_file.write(" <name>hadoop.tmp.dir</name>" + '\n')
output_file.write("
<value>"+tmpdir+"/hadoop-${user.name}"+"</value>" + '\n')
output_file.write("</property>" + '\n')

output_file.write("<property>" + '\n')
output_file.write(" <name>fs.default.name</name>" + '\n')
output_file.write(" <value>hdfs://" + master +
":9000</value>" + '\n')
output_file.write("</property>" + '\n')
filetail(output_file)
output_file.close()

def main():

try:
if (len(sys.argv) != 4):
print "Usage: " + sys.argv[0] + " pehostfile
tmpdir nodes"
sys.exit(1)
else:
HADOOP_HOME=`os.environ.get("HADOOP_HOME")`
HADOOP_HOME=HADOOP_HOME.strip('\'')
pehostfile = sys.argv[1]
tmpdir = sys.argv[2]
nodes = sys.argv[3]
coresitefile=HADOOP_HOME+"/conf/core-site.xml"
mapredfile=HADOOP_HOME+"/conf/mapred-site.xml"
hdfssitefile=HADOOP_HOME+"/conf/hdfs-site.xml"
mastersfile=HADOOP_HOME+"/conf/masters"
slavesfile=HADOOP_HOME+"/conf/slaves"
master=getmaster(pehostfile, mastersfile, slavesfile)
createcoresite(master, tmpdir, coresitefile)
createmapredsite(master, mapredfile)
createhdfssite(nodes, hdfssitefile)
except:
print "Error in execution"



if __name__ == "__main__":
main()
Reply all
Reply to author
Forward
0 new messages