Hi!
Why can't specifying the Pyspark version in environment variables be optional?
like this
import os
import pyspark
os.environ['SPARK_VERSION'] = str(pyspark.__version__)
or
@lru_cache(maxsize=None)
def _get_spark_version() -> str:
try:
spark_version = os.environ.get("SPARK_VERSION")
if not spark_version:
spark_version = str(pyspark.__version__)
except KeyError:
raise RuntimeError(f"SPARK_VERSION environment variable is required. Supported values are: {SPARK_TO_DEEQU_COORD_MAPPING.keys()}")
return _extract_major_minor_versions(spark_version)
And default spark version ...
like this
class VerificationResult:
"""The results returned from the VerificationSuite
:param verificationRunBuilder verificationRun: verification result run()
"""
def __init__(self, spark_session: Optional[SparkSession], verificationRun):
self._spark_session = self._setup_spark_session(spark_session)
self.verificationRun = verificationRun
def _setup_spark_session(self, session=None):
if session:
return session
potencial_session = SparkSession.getActiveSession()
if potencial_session:
return potencial_session
else:
msg = "Spark session not found, init with `VerificationResult(my_session, ...)`"
raise AttributeError(msg)
maybe i can do this and create PR?
Hi!
Why can't specifying the Pyspark version in environment variables be optional?
like this
or
And default spark version ...
like this
maybe i can do this and create PR?