Why the example code is not working in my k8s clusters? #27

physicsLoveJava · 2019-09-03T15:46:07Z

Follow the step, I got my cluster working, but when I use the jupyter book for the example, I got error.
Below is the code:

from __future__ import print_function
import sys
from random import random
from operator import add
import os
from pyspark.sql import SparkSession


spark = SparkSession\
      .builder\
      .appName("PythonPi")\
      .config("spark.app.name", "spark-pi")\
      .config("spark.executor.instances", "2")\
      .getOrCreate()
partitions = 2
n = 100000 * partitions

def f(_):
    x = random() * 2 - 1
    y = random() * 2 - 1
    return 1 if x ** 2 + y ** 2 <= 1 else 0

count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
print("Pi is roughly %f" % (4.0 * count / n))

The error is below:

---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-2-b68505a94fe2> in <module>()
      8 
      9 
---> 10 spark = SparkSession      .builder      .appName("PythonPi")      .config("spark.app.name", "spark-pi")      .config("spark.executor.instances", "2")      .getOrCreate()
     11 partitions = 2
     12 n = 100000 * partitions

/opt/spark/python/pyspark/sql/session.py in getOrCreate(self)
    167                     for key, value in self._options.items():
    168                         sparkConf.set(key, value)
--> 169                     sc = SparkContext.getOrCreate(sparkConf)
    170                     # This SparkContext may be an existing one.
    171                     for key, value in self._options.items():

/opt/spark/python/pyspark/context.py in getOrCreate(cls, conf)
    332         with SparkContext._lock:
    333             if SparkContext._active_spark_context is None:
--> 334                 SparkContext(conf=conf or SparkConf())
    335             return SparkContext._active_spark_context
    336 

/opt/spark/python/pyspark/context.py in __init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
    116         try:
    117             self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
--> 118                           conf, jsc, profiler_cls)
    119         except:
    120             # If an error occurs, clean up in order to allow future SparkContext creation:

/opt/spark/python/pyspark/context.py in _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, jsc, profiler_cls)
    178 
    179         # Create the Java SparkContext through Py4J
--> 180         self._jsc = jsc or self._initialize_context(self._conf._jconf)
    181         # Reset the SparkConf to the one actually used by the SparkContext in JVM.
    182         self._conf = SparkConf(_jconf=self._jsc.sc().conf())

/opt/spark/python/pyspark/context.py in _initialize_context(self, jconf)
    271         Initialize SparkContext in function to allow subclass specific initialization
    272         """
--> 273         return self._jvm.JavaSparkContext(jconf)
    274 
    275     @classmethod

/opt/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args)
   1399         answer = self._gateway_client.send_command(command)
   1400         return_value = get_return_value(
-> 1401             answer, self._gateway_client, None, self._fqn)
   1402 
   1403         for temp_arg in temp_args:

/opt/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    317                 raise Py4JJavaError(
    318                     "An error occurred while calling {0}{1}{2}.\n".
--> 319                     format(target_id, ".", name), value)
    320             else:
    321                 raise Py4JError(

Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext.
: org.apache.spark.SparkException: Executor cannot find driver pod
	at org.apache.spark.scheduler.cluster.k8s.KubernetesClusterSchedulerBackend.liftedTree1$1(KubernetesClusterSchedulerBackend.scala:78)
	at org.apache.spark.scheduler.cluster.k8s.KubernetesClusterSchedulerBackend.<init>(KubernetesClusterSchedulerBackend.scala:72)
	at org.apache.spark.scheduler.cluster.k8s.KubernetesClusterManager.createSchedulerBackend(KubernetesClusterManager.scala:191)
	at org.apache.spark.SparkContext$.org$apache$spark$SparkContext$$createTaskScheduler(SparkContext.scala:2764)
	at org.apache.spark.SparkContext.<init>(SparkContext.scala:501)
	at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:236)
	at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
	at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
	at py4j.GatewayConnection.run(GatewayConnection.java:214)
	at java.lang.Thread.run(Thread.java:748)
Caused by: io.fabric8.kubernetes.client.KubernetesClientException: Failure executing: GET at: https://kubernetes.default.svc/api/v1/namespaces/default/pods/spark-all-jupyter-569c6f4cdc-596fj. Message: Forbidden!Configured service account doesn't have access. Service account may have been revoked. pods "spark-all-jupyter-569c6f4cdc-596fj" is forbidden: User "system:serviceaccount:default:default" cannot get resource "pods" in API group "" in the namespace "default".
	at io.fabric8.kubernetes.client.dsl.base.OperationSupport.requestFailure(OperationSupport.java:470)
	at io.fabric8.kubernetes.client.dsl.base.OperationSupport.assertResponseCode(OperationSupport.java:407)
	at io.fabric8.kubernetes.client.dsl.base.OperationSupport.handleResponse(OperationSupport.java:379)
	at io.fabric8.kubernetes.client.dsl.base.OperationSupport.handleResponse(OperationSupport.java:343)
	at io.fabric8.kubernetes.client.dsl.base.OperationSupport.handleGet(OperationSupport.java:312)
	at io.fabric8.kubernetes.client.dsl.base.OperationSupport.handleGet(OperationSupport.java:295)
	at io.fabric8.kubernetes.client.dsl.base.BaseOperation.handleGet(BaseOperation.java:783)
	at io.fabric8.kubernetes.client.dsl.base.BaseOperation.getMandatory(BaseOperation.java:217)
	at io.fabric8.kubernetes.client.dsl.base.BaseOperation.get(BaseOperation.java:184)
	at org.apache.spark.scheduler.cluster.k8s.KubernetesClusterSchedulerBackend.liftedTree1$1(KubernetesClusterSchedulerBackend.scala:74)
	... 16 more

The text was updated successfully, but these errors were encountered:

ashetkar · 2019-09-04T11:17:37Z

@physicsLoveJava
From the error message, it looks like the concerned service account does not have GET permissions on pods.
You can provide the required permissions as explained here https://github.com/SnappyDataInc/spark-on-k8s#steps-if-a-kubernetes-cluster-is-available

physicsLoveJava · 2019-09-04T15:22:24Z

You're awesome! 👍 👍
I used the wrong namespace name.
By the way, is it able to change the python version from 2 to 3?

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Why the example code is not working in my k8s clusters? #27

Why the example code is not working in my k8s clusters? #27

physicsLoveJava commented Sep 3, 2019

ashetkar commented Sep 4, 2019

physicsLoveJava commented Sep 4, 2019

Why the example code is not working in my k8s clusters? #27

Why the example code is not working in my k8s clusters? #27

Comments

physicsLoveJava commented Sep 3, 2019

ashetkar commented Sep 4, 2019

physicsLoveJava commented Sep 4, 2019