I am accsessing the variable in the yarn-site.sh.erb template as follows:
<% if @yarn_env_java_heap_max -%>
JAVA_HEAP_MAX = <%= yarn_env_java_heap_max %>
<% end -%>
the role class
# role/wh/hadoop.pp
# Role classes for Hadoop nodes.
#
# Usage:
#
# To install only hadoop client packages and configs:
# include role::wh::hadoop
#
# To install a Hadoop Master (NameNode + ResourceManager, etc.):
# include role::wh::hadoop::master
#
# To install a Hadoop Worker (DataNode + NodeManager + etc.):
# include role::wh::hadoop::worker
#
class role::wh::hadoop::client {
# include common labs or production hadoop configs
case $common::data::env {
'dev': {
include role::wh::hadoop::dev
anchor { 'cdh5_hadoop_dev_first': } -> Class['role::wh::hadoop::dev'] -> anchor { 'cdh5_hadoop_dev_last': }
}
'qa' : {
include role::wh::hadoop::qa
anchor { 'cdh5_hadoop_qa_first': } -> Class['role::wh::hadoop::qa'] -> anchor { 'cdh5_hadoop_qa_last': }
}
'prod': {
include role::wh::hadoop::production
anchor { 'cdh5_hadoop_production_first': } -> Class['role::wh::hadoop::production'] -> anchor { 'cdh5_hadoop_production_last': }
}
default: { fail("Unrecognized environment type for hadoop") }
}
}
class role::wh::hadoop::journalnode inherits role::wh::hadoop::client {
motd::register{ 'Hadoop Journal Node': }
class { 'cdh5::hadoop::journalnode' :}
anchor { 'cdh5_hadoop_journalnode_first': } -> Class['cdh5::hadoop::journalnode'] -> anchor { 'cdh5_hadoop_journalnode_last': }
}
class role::wh::hadoop::master inherits role::wh::hadoop::client {
motd::register{ 'Hadoop Master (NameNode, ResourceManager & HistoryServer)': }
system::mkdirs{'/var/lib/hadoop-hdfs/cache/hdfs/dfs/name' :}
system::mkdirs{'/var/lib/hadoop-hdfs/journalEdits' :}
class { 'cdh5::hadoop::master' :}
anchor { 'cdh5_hadoop_master_first': } -> Class['cdh5::hadoop::master'] -> anchor { 'cdh5_hadoop_master_last': }
}
class role::wh::hadoop::worker inherits role::wh::hadoop::client {
motd::register{ 'Hadoop Worker (DataNode & NodeManager)': }
class { 'cdh5::hadoop::worker' : }
anchor { 'cdh5_hadoop_worker_first': } -> Class['cdh5::hadoop::worker'] -> anchor { 'cdh5_hadoop_worker_last': }
}
class role::wh::hadoop::standby inherits role::wh::hadoop::client {
motd::register{ 'Hadoop Standby NameNode': }
system::mkdirs{'/var/lib/hadoop-hdfs/cache/hdfs/dfs/name' :}
system::mkdirs{'/var/lib/hadoop-hdfs/journalEdits' :}
class { 'cdh5::hadoop::namenode' : }
class {'cdh5::hadoop::resourcemanager' : }
anchor { 'cdh5_wh_hadoop_standby_first': } -> Class['cdh5::hadoop::namenode'] -> Class['cdh5::hadoop::resourcemanager'] -> anchor { 'cdh5_wh_hadoop_standby_last': }
}
class role::wh::hadoop::primary::postinstall inherits role::wh::hadoop::client {
class { 'cdh5::hadoop::namenode::primarypostinstall' : }
anchor { 'cdh5_wh_hadoop_primarypostinstall_first': } -> Class['cdh5::hadoop::namenode::primarypostinstall'] -> anchor { 'cdh5_wh_hadoop_primarypostinstall_last': }
}
class role::wh::hadoop::standby::postinstall inherits role::wh::hadoop::client {
class { 'cdh5::hadoop::namenode::standbypostinstall' : }
anchor { 'cdh5_wh_hadoop_standbypostinstall_first': } -> Class['cdh5::hadoop::namenode::standbypostinstall'] -> anchor { 'cdh5_wh_hadoop_standbypostinstall_last': }
}
### The following classes should not be included directly.
### You should either include role::wh::hadoop::client,
### or role::wh::hadoop::worker or
### role::wh::hadoop::master.
class role::wh::hadoop::production {
class { 'cdh5::hadoop':
namenode_hosts => [
'us3sm2nn010r07.comp.prod.local',
'us3sm2nn011r08.comp.prod.local',
],
rm_hosts => [
'us3sm2nn010r07.comp.prod.local',
'us3sm2nn011r08.comp.prod.local',
],
dfs_name_dir => [['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name', '/nfs/namedir']],
config_directory => '/etc/hadoop/conf',
nameservice_id => 'whprod', # This is the logical name of the Hadoop cluster.
journalnode_hosts => [
'us3sm2zk010r07.comp.prod.local',
'us3sm2zk011r08.comp.prod.local',
'us3sm2zk012r09.comp.prod.local',
],
dfs_journalnode_edits_dir => '/var/lib/hadoop-hdfs/journalEdits',
datanode_mounts => [
'/data1',
'/data2',
'/data3',
'/data4',
'/data5',
'/data6',
'/data7',
'/data8',
'/data9',
'/data10',
'/data11'
],
dfs_data_path => 'dfs',
dfs_block_size => 268435456, # 256 MB
# Turn on Snappy compression by default for maps and final outputs
mapreduce_intermediate_compression => true,
mapreduce_intermediate_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec',
mapreduce_output_compression => true,
mapreduce_output_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec',
mapreduce_output_compression_type => 'BLOCK',
#mapreduce_map_tasks_maximum => ($::processorcount - 2) / 2,
#mapreduce_reduce_tasks_maximum => ($::processorcount - 2) / 2,
#mapreduce_job_reuse_jvm_num_tasks => 1,
#mapreduce_map_memory_mb => 1536,
#mapreduce_reduce_memory_mb => 3072,
#mapreduce_map_java_opts => '-Xmx1024M',
#mapreduce_reduce_java_opts => '-Xmx2560M',
#mapreduce_reduce_shuffle_parallelcopies => 10,
#mapreduce_task_io_sort_mb => 200,
#mapreduce_task_io_sort_factor => 10,
if ($::hostname in $rm_hosts){
yarn_env_java_heap_max => '-Xmx4000M',
}
yarn_nodemanager_resource_memory_mb => 40960,
yarn_resourcemanager_scheduler_class => 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler',
net_topology_script_template => 'hadoop/getRackID.py-prod',
}
anchor { 'cdh5_hadoop_first': } -> Class['cdh5::hadoop'] -> anchor { 'cdh5_hadoop_last': }
file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml":
content => template('hadoop/capacity-scheduler.xml-prod'),
require => Class['cdh5::hadoop'],
}
}
class role::wh::hadoop::qa {
class { 'cdh5::hadoop':
namenode_hosts => [
'us3sm2hbqa03r09.comp.prod.local',
'us3sm2hbqa04r07.comp.prod.local',
],
rm_hosts => [
'us3sm2hbqa03r09.comp.prod.local',
'us3sm2hbqa04r07.comp.prod.local',
],
dfs_name_dir => [['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name']],
config_directory => '/etc/hadoop/conf',
nameservice_id => 'whqa', # This is the logical name of the Hadoop cluster.
journalnode_hosts => [
'us3sm2hbqa03r09.comp.prod.local',
'us3sm2hbqa04r07.comp.prod.local',
'us3sm2hbqa05r08.comp.prod.local',
],
dfs_journalnode_edits_dir => '/var/lib/hadoop-hdfs/journalEdits',
datanode_mounts => [
'/data1',
'/data2'
],
dfs_data_path => 'dfs',
dfs_block_size => 268435456, # 256 MB
# Turn on Snappy compression by default for maps and final outputs
mapreduce_intermediate_compression => true,
mapreduce_intermediate_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec',
mapreduce_output_compression => true,
mapreduce_output_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec',
mapreduce_output_compression_type => 'BLOCK',
yarn_nodemanager_resource_memory_mb => 24576,
yarn_resourcemanager_max_completed_applications => 500,
yarn_resourcemanager_scheduler_class => 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler',
net_topology_script_template => 'hadoop/getRackID.py-qa',
}
anchor { 'cdh5_wh_hadoop_qa_first': } -> Class['cdh5::hadoop'] -> anchor { 'cdh5_wh_hadoop_qa_last': }
file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml":
content => template('hadoop/capacity-scheduler.xml-qa'),
require => Class['cdh5::hadoop'],
}
}
class role::wh::hadoop::dev {
class { 'cdh5::hadoop':
namenode_hosts => [$::fqdn],
rm_hosts => [$::fqdn],
dfs_name_dir => [['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name']],
config_directory => '/etc/hadoop/conf',
# nameservice_id => 'whdev',
journalnode_hosts => [$::fqdn],
dfs_journalnode_edits_dir => '/var/lib/hadoop-hdfs/journalEdits',
datanode_mounts => [
'/data1',
'/data2'
],
dfs_data_path => 'dfs',
dfs_block_size => 67108864, # 256 MB
# Turn on Snappy compression by default for maps and final outputs
mapreduce_intermediate_compression => true,
mapreduce_intermediate_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec',
mapreduce_output_compression => true,
mapreduce_output_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec',
mapreduce_output_compression_type => 'BLOCK',
mapreduce_map_tasks_maximum => 2,
mapreduce_reduce_tasks_maximum => 2,
yarn_nodemanager_resource_memory_mb => 4096,
yarn_resourcemanager_scheduler_class => 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler',
}
anchor { 'cdh5_hadoop_first': } -> Class['cdh5::hadoop'] -> anchor { 'cdh5_hadoop_last': }
file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml":
content => template('hadoop/capacity-scheduler.xml-qa'),
require => Class['cdh5::hadoop'],
}
}
Note that the main hadoop class has a yarn_env_java_heap_max parameter that I added, which also is given a default value.