Skip to content

Commit

Permalink
fix:pyspark lazy load
Browse files Browse the repository at this point in the history
  • Loading branch information
Aries-ckt committed Sep 27, 2023
1 parent 1d3ea2a commit 92c25fe
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions pilot/connections/conn_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@ class SparkConnect(BaseConnect):
driver: str = "spark"
"""db dialect"""
dialect: str = "sparksql"
from pyspark.sql import SparkSession, DataFrame

def __init__(
self,
file_path: str,
spark_session: Optional[SparkSession] = None,
spark_session: Optional = None,
engine_args: Optional[dict] = None,
**kwargs: Any,
) -> None:
"""Initialize the Spark DataFrame from Datasource path
return: Spark DataFrame
"""
from pyspark.sql import SparkSession

self.spark_session = (
spark_session or SparkSession.builder.appName("dbgpt_spark").getOrCreate()
)
Expand All @@ -47,7 +47,7 @@ def from_file_path(
except Exception as e:
print("load spark datasource error" + str(e))

def create_df(self, path) -> DataFrame:
def create_df(self, path):
"""Create a Spark DataFrame from Datasource path(now support parquet, jdbc, orc, libsvm, csv, text, json.).
return: Spark DataFrame
reference:https://spark.apache.org/docs/latest/sql-data-sources-load-save-functions.html
Expand Down

0 comments on commit 92c25fe

Please sign in to comment.