Meesho · dhama-shashank-meesho · Mar 3, 2026 · Mar 6, 2026 · Mar 7, 2026 · Mar 8, 2026
diff --git a/pom.xml b/pom.xml
@@ -62,6 +62,7 @@
     <module>zeppelin-jupyter-interpreter-shaded</module>
     <module>groovy</module>
     <module>spark</module>
+    <module>spark-connect</module>
     <module>spark-submit</module>
     <module>submarine</module>
     <module>markdown</module>

diff --git a/scripts/docker/zeppelin-interpreter/env_python_3_with_R.yml b/scripts/docker/zeppelin-interpreter/env_python_3_with_R.yml
@@ -4,29 +4,55 @@ channels:
   - defaults
 dependencies:
   - python >=3.9,<3.10
-  - pyspark=3.3.2
+  - pyspark=3.5
   - pycodestyle
-  - scipy
+  # --- Core data libraries ---
+  - pandas
   - numpy
+  - scipy
+  - pyarrow
+  # --- Spark Connect protocol ---
   - grpcio
   - protobuf
+  # --- HTTP / networking ---
+  - requests
+  - urllib3
+  # --- File format support ---
+  - openpyxl
+  - xlrd
+  - pyyaml
+  - tabulate
+  # --- GCP access ---
+  - google-cloud-storage
+  - google-auth
+  - gcsfs
+  # --- Visualization ---
+  - matplotlib
+  - seaborn
+  - plotly
+  - plotnine
+  - altair
+  - vega_datasets
+  - hvplot
+  # --- SQL on pandas ---
   - pandasql
+  # --- ML ---
+  - scikit-learn
+  - xgboost
+  # --- IPython / kernel ---
   - ipython
   - ipykernel
   - jupyter_client
-  - hvplot
-  - plotnine
-  - seaborn
+  # --- Data connectors ---
   - intake
   - intake-parquet
   - intake-xarray
-  - altair
-  - vega_datasets
-  - plotly
+  # --- pip-only packages ---
   - pip
   - pip:
-    # works for regular pip packages
     - bkzep==0.6.1
+    - delta-spark==3.2.1
+  # --- R support ---
   - r-base=3
   - r-data.table
   - r-evaluate

diff --git a/spark-connect/pom.xml b/spark-connect/pom.xml
@@ -0,0 +1,130 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <artifactId>zeppelin-interpreter-parent</artifactId>
+    <groupId>org.apache.zeppelin</groupId>
+    <version>0.11.2</version>
+    <relativePath>../zeppelin-interpreter-parent/pom.xml</relativePath>
+  </parent>
+
+  <artifactId>spark-connect-interpreter</artifactId>
+  <packaging>jar</packaging>
+  <name>Zeppelin: Spark Connect Interpreter</name>
+  <description>Zeppelin Spark Connect support via gRPC client</description>
+
+  <properties>
+    <interpreter.name>spark-connect</interpreter.name>
+    <spark.connect.version>3.5.3</spark.connect.version>
+    <spark.scala.binary.version>2.12</spark.scala.binary.version>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-connect-client-jvm_${spark.scala.binary.version}</artifactId>
+      <version>${spark.connect.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.zeppelin</groupId>
+      <artifactId>zeppelin-python</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration>
+          <filters>
+            <filter>
+              <artifact>*:*</artifact>
+              <excludes>
+                <exclude>META-INF/*.SF</exclude>
+                <exclude>META-INF/*.DSA</exclude>
+                <exclude>META-INF/*.RSA</exclude>
+              </excludes>
+            </filter>
+          </filters>
+          <transformers>
+            <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+            <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+              <resource>reference.conf</resource>
+            </transformer>
+          </transformers>
+          <artifactSet>
+            <excludes>
+              <exclude>org.apache.zeppelin:zeppelin-interpreter-shaded</exclude>
+            </excludes>
+          </artifactSet>
+          <relocations>
+            <relocation>
+              <pattern>io.netty</pattern>
+              <shadedPattern>org.apache.zeppelin.spark.connect.io.netty</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com.google</pattern>
+              <shadedPattern>org.apache.zeppelin.spark.connect.com.google</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>io.grpc</pattern>
+              <shadedPattern>org.apache.zeppelin.spark.connect.io.grpc</shadedPattern>
+            </relocation>
+          </relocations>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>spark-connect-3.5</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
+      <properties>
+        <spark.connect.version>3.5.3</spark.connect.version>
+      </properties>
+    </profile>
+  </profiles>
+
+</project>
diff --git a/spark-connect/src/main/java/org/apache/zeppelin/spark/IPySparkConnectInterpreter.java b/spark-connect/src/main/java/org/apache/zeppelin/spark/IPySparkConnectInterpreter.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zeppelin.spark;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.zeppelin.interpreter.InterpreterContext;
+import org.apache.zeppelin.interpreter.InterpreterException;
+import org.apache.zeppelin.python.IPythonInterpreter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Properties;
+
+/**
+ * PySpark Connect Interpreter which uses IPython underlying.
+ * Reuses the Java SparkSession from SparkConnectInterpreter via Py4j.
+ */
+public class IPySparkConnectInterpreter extends IPythonInterpreter {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(IPySparkConnectInterpreter.class);
+
+  private SparkConnectInterpreter sparkConnectInterpreter;
+  private PySparkConnectInterpreter pySparkConnectInterpreter;
+  private boolean opened = false;
+  private InterpreterContext curIntpContext;
+
+  public IPySparkConnectInterpreter(Properties property) {
+    super(property);
+  }
+
+  @Override
+  public synchronized void open() throws InterpreterException {
+    if (opened) {
+      return;
+    }
+
+    this.sparkConnectInterpreter =
+        getInterpreterInTheSameSessionByClassName(SparkConnectInterpreter.class);
+    this.pySparkConnectInterpreter =
+        getInterpreterInTheSameSessionByClassName(PySparkConnectInterpreter.class, false);
+
+    sparkConnectInterpreter.open();
+
+    setProperty("zeppelin.python", pySparkConnectInterpreter.getPythonExec());
+    setUseBuiltinPy4j(true);
+    setAdditionalPythonInitFile("python/zeppelin_isparkconnect.py");
+    super.open();
+    opened = true;
+  }
+
+  @Override
+  public org.apache.zeppelin.interpreter.InterpreterResult interpret(String st,
+      InterpreterContext context) throws InterpreterException {
+    InterpreterContext.set(context);
+    this.curIntpContext = context;
+    String setInptContextStmt = "intp.setInterpreterContextInPython()";
+    org.apache.zeppelin.interpreter.InterpreterResult result =
+        super.interpret(setInptContextStmt, context);
+    if (result.code().equals(org.apache.zeppelin.interpreter.InterpreterResult.Code.ERROR)) {
+      return new org.apache.zeppelin.interpreter.InterpreterResult(
+          org.apache.zeppelin.interpreter.InterpreterResult.Code.ERROR,
+          "Fail to setCurIntpContext");
+    }
+
+    return super.interpret(st, context);
+  }
+
+  public void setInterpreterContextInPython() {
+    InterpreterContext.set(curIntpContext);
+  }
+
+  public SparkSession getSparkSession() {
+    if (sparkConnectInterpreter != null) {
+      return sparkConnectInterpreter.getSparkSession();
+    }
+    return null;
+  }
+
+  @Override
+  public void cancel(InterpreterContext context) throws InterpreterException {
+    super.cancel(context);
+    if (sparkConnectInterpreter != null) {
+      sparkConnectInterpreter.cancel(context);
+    }
+  }
+
+  @Override
+  public void close() throws InterpreterException {
+    LOGGER.info("Close IPySparkConnectInterpreter (opened={})", opened);
+    try {
+      super.close();
+    } finally {
+      opened = false;
+      sparkConnectInterpreter = null;
+      pySparkConnectInterpreter = null;
+      LOGGER.info("IPySparkConnectInterpreter closed and state reset — ready for re-open");
-  public void close() throws InterpreterException {
-    LOGGER.info("Close IPySparkConnectInterpreter (opened={})", opened);
-    try {
-      super.close();
-    } finally {
-      opened = false;
-      sparkConnectInterpreter = null;
-      pySparkConnectInterpreter = null;
-      LOGGER.info("IPySparkConnectInterpreter closed and state reset — ready for re-open");
+  public void close() throws InterpreterException {
+    LOGGER.info("Close IPySparkConnectInterpreter (opened={})", opened);
+    try {
+      super.close();
+    } finally {
+      if (sparkConnectInterpreter != null) {
+        try {
+          sparkConnectInterpreter.close();
+        } catch (InterpreterException e) {
+          LOGGER.warn("Error closing SparkConnectInterpreter", e);
+        }
+      }
+      opened = false;
+      curIntpContext = null;
+      sparkConnectInterpreter = null;
+      pySparkConnectInterpreter = null;
+      LOGGER.info("IPySparkConnectInterpreter closed and state reset — ready for re-open");
+    }
+  }
-  public void close() throws InterpreterException {
-    LOGGER.info("Close IPySparkConnectInterpreter (opened={})", opened);
-    try {
-      super.close();
-    } finally {
-      opened = false;
-      sparkConnectInterpreter = null;
-      pySparkConnectInterpreter = null;
-      LOGGER.info("IPySparkConnectInterpreter closed and state reset — ready for re-open");
+  public void close() throws InterpreterException {
+    LOGGER.info("Close IPySparkConnectInterpreter (opened={})", opened);
+    try {
+      super.close();
+    } finally {
+      if (sparkConnectInterpreter != null) {
+        try {
+          sparkConnectInterpreter.close();
+        } catch (InterpreterException e) {
+          LOGGER.warn("Error closing SparkConnectInterpreter", e);
+        }
+      }
+      opened = false;
+      curIntpContext = null;
+      sparkConnectInterpreter = null;
+      pySparkConnectInterpreter = null;
+      LOGGER.info("IPySparkConnectInterpreter closed and state reset — ready for re-open");
+    }
+  }
+    }
+  }
+
+  @Override
+  public int getProgress(InterpreterContext context) throws InterpreterException {
+    return 0;
+  }
+
+  public int getMaxResult() {
+    if (sparkConnectInterpreter != null) {
+      return sparkConnectInterpreter.getMaxResult();
+    }
+    return 1000;
+  }
+
+  @SuppressWarnings("unchecked")
+  public String formatDataFrame(Object df, int maxResult) {
+    return SparkConnectUtils.showDataFrame((Dataset<Row>) df, maxResult);
+  }
+}