cd ./spark-1.5.1-bin-hadoop2.4/ec2
./spark-ec2 --key-pair=<my_key_pair> --identity-file=/my/key/pair/file.pem --instance-type=r3.large -s 1 --spot-price=0.25 launch spark_rstudio_h2o_cluster
unzip sparkling-water-1.5.12.zip
export SPARK_HOME="/root/spark/"
export HADOOP_CONF_DIR=/etc/hadoop/conf
cd ./sparkling-water-1.5.12
bin/pysparkling --num-executors 3 --executor-memory 20g --executor-cores 10 --driver-memory 20g
from pysparkling import *
>>> from pysparkling import *
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/mnt/spark/spark-2efad8eb-5ee2-4878-b3b8-6e85aeed9390/userFiles-3489d960-50f7-4175-94cf-b594b15a0943/pySparkling-1.5.12-py2.7.egg/pysparkling/__init__.py", line 11, in <module>
File "/mnt/spark/spark-2efad8eb-5ee2-4878-b3b8-6e85aeed9390/userFiles-3489d960-50f7-4175-94cf-b594b15a0943/pySparkling-1.5.12-py2.7.egg/pysparkling/dataframe.py", line 1, in <module>
File "/mnt/spark/spark-2efad8eb-5ee2-4878-b3b8-6e85aeed9390/userFiles-3489d960-50f7-4175-94cf-b594b15a0943/pySparkling-1.5.12-py2.7.egg/h2o/__init__.py", line 7, in <module>
File "/mnt/spark/spark-2efad8eb-5ee2-4878-b3b8-6e85aeed9390/userFiles-3489d960-50f7-4175-94cf-b594b15a0943/pySparkling-1.5.12-py2.7.egg/h2o/h2o.py", line 8, in <module>
ImportError: No module named future.standard_library
>>>
hc= H2OContext(sc).start()
import h2o
bin/sparkling-shell --num-executors 3 --executor-memory 20g --executor-cores 10 --driver-memory 20g
-----
Spark master (MASTER) : local[*]
Spark home (SPARK_HOME) : /root/spark/
H2O build version : 3.8.1.3 (turan)
Spark build version : 1.5.2
----
16/04/08 17:04:04 INFO spark.SecurityManager: Changing view acls to: root
16/04/08 17:04:04 INFO spark.SecurityManager: Changing modify acls to: root
16/04/08 17:04:04 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root)
16/04/08 17:04:04 INFO spark.HttpServer: Starting HTTP Server
16/04/08 17:04:04 INFO server.Server: jetty-8.y.z-SNAPSHOT
16/04/08 17:04:04 INFO util.Utils: Successfully started service 'HTTP class server' on port 35962.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 1.5.1
/_/
Using Scala version 2.10.4 (OpenJDK 64-Bit Server VM, Java 1.7.0_99)
Type in expressions to have them evaluated.
Type :help for more information.
16/04/08 17:04:10 INFO spark.SparkContext: Running Spark version 1.5.1
16/04/08 17:04:10 WARN spark.SparkConf:
SPARK_WORKER_INSTANCES was detected (set to '1').
This is deprecated in Spark 1.0+.
Please instead use:
- ./spark-submit with --num-executors to specify the number of executors
- Or set SPARK_EXECUTOR_INSTANCES
- spark.executor.instances to configure the number of instances in the spark config.
16/04/08 17:04:10 INFO spark.SecurityManager: Changing view acls to: root
16/04/08 17:04:10 INFO spark.SecurityManager: Changing modify acls to: root
16/04/08 17:04:10 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root)
16/04/08 17:04:10 INFO slf4j.Slf4jLogger: Slf4jLogger started
16/04/08 17:04:10 INFO Remoting: Starting remoting
16/04/08 17:04:10 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://spark...@98.76.54.321:58594]
16/04/08 17:04:10 INFO util.Utils: Successfully started service 'sparkDriver' on port 58594.
16/04/08 17:04:10 INFO spark.SparkEnv: Registering MapOutputTracker
16/04/08 17:04:10 INFO spark.SparkEnv: Registering BlockManagerMaster
16/04/08 17:04:10 INFO storage.DiskBlockManager: Created local directory at /mnt/spark/blockmgr-e92776ff-dd11-40fa-a712-07560a852bb4
16/04/08 17:04:10 INFO storage.MemoryStore: MemoryStore started with capacity 10.4 GB
16/04/08 17:04:10 INFO spark.HttpFileServer: HTTP File server directory is /mnt/spark/spark-ff5a4dac-b059-4c3c-9a50-cfa76a2002e0/httpd-1a3b19a7-cc3d-4238-9a50-b409e2be1d83
16/04/08 17:04:10 INFO spark.HttpServer: Starting HTTP Server
16/04/08 17:04:10 INFO server.Server: jetty-8.y.z-SNAPSHOT
16/04/08 17:04:10 INFO util.Utils: Successfully started service 'HTTP file server' on port 44748.
16/04/08 17:04:10 INFO spark.SparkEnv: Registering OutputCommitCoordinator
16/04/08 17:04:11 INFO server.Server: jetty-8.y.z-SNAPSHOT
16/04/08 17:04:11 INFO util.Utils: Successfully started service 'SparkUI' on port 4040.
16/04/08 17:04:11 WARN metrics.MetricsSystem: Using default name DAGScheduler for source because
spark.app.id is not set.
16/04/08 17:04:11 INFO client.AppClient$ClientEndpoint: Connecting to master spark://ec2-12-34-567-89.compute-1.amazonaws.com:7077...
16/04/08 17:04:11 INFO cluster.SparkDeploySchedulerBackend: Connected to Spark cluster with app ID app-20160408170411-0010
16/04/08 17:04:11 INFO util.Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 38479.
16/04/08 17:04:11 INFO netty.NettyBlockTransferService: Server created on 38479
16/04/08 17:04:11 INFO storage.BlockManagerMaster: Trying to register BlockManager
16/04/08 17:04:11 INFO storage.BlockManagerMasterEndpoint: Registering block manager 98.76.54.321:38479 with 10.4 GB RAM, BlockManagerId(driver, 98.76.54.321, 38479)
16/04/08 17:04:11 INFO storage.BlockManagerMaster: Registered BlockManager
16/04/08 17:04:11 INFO cluster.SparkDeploySchedulerBackend: SchedulerBackend is ready for scheduling beginning after reached minRegisteredResourcesRatio: 0.0
16/04/08 17:04:11 INFO repl.SparkILoop: Created spark context..
Spark context available as sc.
16/04/08 17:04:12 INFO hive.HiveContext: Initializing execution hive, version 1.2.1
16/04/08 17:04:12 INFO client.ClientWrapper: Inspected Hadoop version: 1.2.1
16/04/08 17:04:12 INFO client.ClientWrapper: Loading Hadoop shims org.apache.hadoop.hive.shims.Hadoop20SShims
16/04/08 17:04:12 INFO client.ClientWrapper: Loaded org.apache.hadoop.hive.shims.Hadoop20SShims for Hadoop version 1.2.1
16/04/08 17:04:13 INFO metastore.HiveMetaStore: 0: Opening raw store with implemenation class:org.apache.hadoop.hive.metastore.ObjectStore
16/04/08 17:04:13 INFO metastore.ObjectStore: ObjectStore, initialize called
16/04/08 17:04:13 INFO DataNucleus.Persistence: Property datanucleus.cache.level2 unknown - will be ignored
16/04/08 17:04:13 INFO DataNucleus.Persistence: Property hive.metastore.integral.jdo.pushdown unknown - will be ignored
16/04/08 17:04:16 INFO metastore.ObjectStore: Setting MetaStore object pin classes with hive.metastore.cache.pinobjtypes="Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order"
16/04/08 17:04:17 INFO DataNucleus.Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
16/04/08 17:04:17 INFO DataNucleus.Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
16/04/08 17:04:20 INFO DataNucleus.Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
16/04/08 17:04:20 INFO DataNucleus.Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
16/04/08 17:04:20 INFO metastore.MetaStoreDirectSql: Using direct SQL, underlying DB is DERBY
16/04/08 17:04:20 INFO metastore.ObjectStore: Initialized ObjectStore
16/04/08 17:04:20 WARN metastore.ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 1.2.0
16/04/08 17:04:20 WARN metastore.ObjectStore: Failed to get database default, returning NoSuchObjectException
16/04/08 17:04:21 INFO metastore.HiveMetaStore: Added admin role in metastore
16/04/08 17:04:21 INFO metastore.HiveMetaStore: Added public role in metastore
16/04/08 17:04:21 INFO metastore.HiveMetaStore: No user is added in admin role, since config is empty
16/04/08 17:04:21 INFO metastore.HiveMetaStore: 0: get_all_databases
16/04/08 17:04:21 INFO HiveMetaStore.audit: ugi=root ip=unknown-ip-addr cmd=get_all_databases
16/04/08 17:04:21 INFO metastore.HiveMetaStore: 0: get_functions: db=default pat=*
16/04/08 17:04:21 INFO HiveMetaStore.audit: ugi=root ip=unknown-ip-addr cmd=get_functions: db=default pat=*
16/04/08 17:04:21 INFO DataNucleus.Datastore: The class "org.apache.hadoop.hive.metastore.model.MResourceUri" is tagged as "embedded-only" so does not have its own datastore table.
java.lang.RuntimeException: java.lang.RuntimeException: The root scratch dir: /tmp/hive on HDFS should be writable. Current permissions are: rwx--x--x
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
at org.apache.spark.sql.hive.client.ClientWrapper.<init>(ClientWrapper.scala:171)
at org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala:162)
at org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:160)
at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:167)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
at org.apache.spark.repl.SparkILoop.createSQLContext(SparkILoop.scala:1028)
at $iwC$$iwC.<init>(<console>:9)
at $iwC.<init>(<console>:18)
at <init>(<console>:20)
at .<init>(<console>:24)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:132)
at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:124)
at org.apache.spark.repl.SparkIMain.beQuietDuring(SparkIMain.scala:324)
at org.apache.spark.repl.SparkILoopInit$class.initializeSpark(SparkILoopInit.scala:124)
at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1$$anonfun$apply$mcZ$sp$5.apply$mcV$sp(SparkILoop.scala:974)
at org.apache.spark.repl.SparkILoopInit$class.runThunks(SparkILoopInit.scala:159)
at org.apache.spark.repl.SparkILoop.runThunks(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoopInit$class.postInitialization(SparkILoopInit.scala:108)
at org.apache.spark.repl.SparkILoop.postInitialization(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:991)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.RuntimeException: The root scratch dir: /tmp/hive on HDFS should be writable. Current permissions are: rwx--x--x
at org.apache.hadoop.hive.ql.session.SessionState.createRootHDFSDir(SessionState.java:612)
at org.apache.hadoop.hive.ql.session.SessionState.createSessionDirs(SessionState.java:554)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:508)
... 56 more