val spark = SparkSession
.builder()
.appName("Spark Cloud-SQL basic example")
.getOrCreate()
val connectionProperties = new Properties()
connectionProperties.setProperty("user","root")
connectionProperties.setProperty("password","#####")
val databaseName = "test_database"
val instanceConnectionName = "instance_name"
val jdbcUrl = String.format("jdbc:mysql://google/%s?cloudSqlInstance=%s&socketFactory=com.google.cloud.sql.mysql.SocketFactory",databaseName,instanceConnectionName);
val mktPersonDf = spark.read.jdbc(jdbcUrl,"test_table",connectionProperties)
mktPersonDf.count()
Exception in thread "main" com.mysql.jdbc.exceptions.jdbc4.MySQLNonTransientConnectionException: Could not create socket factory 'com.google.cloud.sql.mysql.SocketFactory' due to underlying exception:
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at com.mysql.jdbc.Util.handleNewInstance(Util.java:418)
at com.mysql.jdbc.Util.getInstance(Util.java:401)
at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:918)
at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:897)
at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:886)
at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:860)
at com.mysql.jdbc.MysqlIO.createSocketFactory(MysqlIO.java:3330)
at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:296)
at com.mysql.jdbc.ConnectionImpl.coreConnect(ConnectionImpl.java:2192)
at com.mysql.jdbc.ConnectionImpl.connectOneTryOnly(ConnectionImpl.java:2225)
at com.mysql.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:2024)
at com.mysql.jdbc.ConnectionImpl.<init>(ConnectionImpl.java:779)
at com.mysql.jdbc.JDBC4Connection.<init>(JDBC4Connection.java:47)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at com.mysql.jdbc.Util.handleNewInstance(Util.java:418)
at com.mysql.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:389)
at com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:330)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:61)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:52)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:58)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation.<init>(JDBCRelation.scala:114)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:52)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:307)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:178)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:146)
at org.apache.spark.sql.DataFrameReader.jdbc(DataFrameReader.scala:193)
at com.marketo.analytics.App$.main(App.scala:27)
at com.marketo.analytics.App.main(App.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:775)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: com.google.cloud.sql.mysql.SocketFactory
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:338)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:264)
at com.mysql.jdbc.MysqlIO.createSocketFactory(MysqlIO.java:3328)
... 33 more
<dependency>
<groupId>com.google.cloud.sql</groupId>
<artifactId>mysql-socket-factory</artifactId>
<version>1.0.5</version>
</dependency>
readonly MYSQL_ROOT_PASSWORD='######'
/etc/google-dataproc/startup-scripts/dataproc-initialization-script-0: 20: set: Illegal option -o pipefail
set -euxo pipefail
/etc/google-dataproc/startup-scripts/dataproc-initialization-script-0: 45: /etc/google-dataproc/startup-scripts/dataproc-initialization-script-0: Syntax error: "(" unexpected
echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@" >&2
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.0.0</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<artifactSet>
<excludes>
<exclude>junit:junit</exclude>
<exclude>org.scalatest:scalatest_2.10</exclude>
</excludes>
</artifactSet>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
[INFO] Including com.google.cloud.sql:mysql-socket-factory:jar:1.0.5 in the shaded jar.[INFO] Including mysql:mysql-connector-java:jar:5.1.38 in the shaded jar.[INFO] Including com.google.cloud.sql:jdbc-socket-factory-core:jar:1.0.5 in the shaded jar.[INFO] Including com.google.apis:google-api-services-sqladmin:jar:v1beta4-rev25-1.22.0 in the shaded jar.
$ gcloud dataproc jobs submit spark \
--cluster=<CLUSTER_NAME> \
--jars=gs://<BUCKET>/<DIRECTORIES>/<UBER_JAR_NAME> \
--class=<MAIN_CLASS> \
--properties=spark.driver.extraClassPath=<UBER_JAR_NAME>
gcloud dataproc clusters create <CLUSTER_NAME> \
--initialization-actions gs://dataproc-initialization-actions/connectors/connectors.sh \
--scopes=cloud-platform \ --metadata 'gcs-connector-version=1.8.1' \ --metadata 'bigquery-connector-version=0.12.1'
2019-04-01 11:05:03.998 IST
Something unusual has occurred to cause the driver to fail. Please report this exception. at org.postgresql.Driver.connect(Driver.java:277) at java.sql.DriverManager.getConnection(DriverManager.java:664) at java.sql.DriverManager.getConnection(DriverManager.java:270) at rdsConnector$.getConnection(rdsConnector.scala:33) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$4.run(ApplicationMaster.scala:721)Caused by: java.lang.RuntimeException: Unable to retrieve information about Cloud SQL instance [gold-bruin-220416:us-east1:pii-isolation] at com.google.cloud.sql.core.SslSocketFactory.obtainInstanceMetadata(SslSocketFactory.java:459) at com.google.cloud.sql.core.SslSocketFactory.fetchInstanceSslInfo(SslSocketFactory.java:333) at com.google.cloud.sql.core.SslSocketFactory.getInstanceSslInfo(SslSocketFactory.java:313) at com.google.cloud.sql.core.SslSocketFactory.createAndConfigureSocket(SslSocketFactory.java:194) at com.google.cloud.sql.core.SslSocketFactory.create(SslSocketFactory.java:160) at com.google.cloud.sql.postgres.SocketFactory.createSocket(SocketFactory.java:96) at org.postgresql.core.PGStream.<init>(PGStream.java:62) at org.postgresql.core.v3.ConnectionFactoryImpl.tryConnect(ConnectionFactoryImpl.java:91) at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:192) at org.postgresql.core.ConnectionFactory.openConnection(ConnectionFactory.java:49) at org.postgresql.jdbc.PgConnection.<init>(PgConnection.java:195) at org.postgresql.Driver.makeConnection(Driver.java:454) at org.postgresql.Driver.connect(Driver.java:256) ... 11 moreCaused by: com.google.api.client.googleapis.json.GoogleJsonResponseException: 403 Forbidden
2019-04-01 11:05:03.000 IST
User class threw exception: org.postgresql.util.PSQLException: Something unusual has occurred to cause the driver to fail. Please report this exception.
def create_cluster(self):
print('Creating cluster...')
print('dataproc',self.dataproc)
#dataproc = get_client()
zone_uri = \
'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format(
self.project_id, self.zone)
cluster_data = {
'projectId': self.project_id,
'clusterName': self.cluster_name,
'scopes': 'sql-admin',
'config': {
'gceClusterConfig': {
'zoneUri': zone_uri
},
'masterConfig': {
'numInstances': 1,
'machineTypeUri': 'n1-standard-1'
},
'workerConfig': {
'numInstances': 2,
'machineTypeUri': 'n1-standard-1'
}
}
#cluster_data['config']['lifecycleConfig']['idleDeleteTtl'] = '300s'
}
result = self.dataproc.projects().regions().clusters().create(
projectId=self.project_id,
region=self.cluster_region,
body=cluster_data).execute()
print("Cluster Created!!!")
return result
com.google.api.client.googleapis.json.GoogleJsonResponseException: 403 Forbidden
Can you ensure that you have the Cloud SQL API auth scope enabled on your compute instances for your dataproc cluster?
Further to this can you ensure that you have CloudSQL IAM permissions on the dataproc compute instance service account