From 6cbdeda881949dffc42fceaed654d986888d1389 Mon Sep 17 00:00:00 2001 From: Danny Chiao Date: Sun, 6 Mar 2022 03:07:28 -0500 Subject: [PATCH 1/2] fix: Fix default feast apply path without any extras Signed-off-by: Danny Chiao --- docs/reference/data-sources/spark.md | 12 +++++++++--- sdk/python/feast/__init__.py | 4 ---- sdk/python/feast/inference.py | 7 ++++--- sdk/python/setup.py | 7 +------ 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/docs/reference/data-sources/spark.md b/docs/reference/data-sources/spark.md index 25b69c7355e..d0bc495924c 100644 --- a/docs/reference/data-sources/spark.md +++ b/docs/reference/data-sources/spark.md @@ -13,7 +13,9 @@ The spark data source API allows for the retrieval of historical feature values Using a table reference from SparkSession(for example, either in memory or a Hive Metastore) ```python -from feast import SparkSource +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( + SparkSource, +) my_spark_source = SparkSource( table="FEATURE_TABLE", @@ -23,7 +25,9 @@ my_spark_source = SparkSource( Using a query ```python -from feast import SparkSource +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( + SparkSource, +) my_spark_source = SparkSource( query="SELECT timestamp as ts, created, f1, f2 " @@ -34,7 +38,9 @@ my_spark_source = SparkSource( Using a file reference ```python -from feast import SparkSource +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( + SparkSource, +) my_spark_source = SparkSource( path=f"{CURRENT_DIR}/data/driver_hourly_stats", diff --git a/sdk/python/feast/__init__.py b/sdk/python/feast/__init__.py index 0af226aa056..83b504b0cb9 100644 --- a/sdk/python/feast/__init__.py +++ b/sdk/python/feast/__init__.py @@ -3,9 +3,6 @@ from pkg_resources import DistributionNotFound, get_distribution from feast.infra.offline_stores.bigquery_source import BigQuerySource -from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( - SparkSource, -) from feast.infra.offline_stores.file_source import FileSource from feast.infra.offline_stores.redshift_source import RedshiftSource from feast.infra.offline_stores.snowflake_source import SnowflakeSource @@ -50,5 +47,4 @@ "RedshiftSource", "RequestFeatureView", "SnowflakeSource", - "SparkSource", ] diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index d233631d3da..b3e51b48162 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -8,7 +8,6 @@ FileSource, RedshiftSource, SnowflakeSource, - SparkSource, ) from feast.data_source import DataSource, RequestDataSource from feast.errors import RegistryInferenceFailure @@ -87,8 +86,10 @@ def update_data_sources_with_inferred_event_timestamp_col( ): # prepare right match pattern for data source ts_column_type_regex_pattern = "" - if isinstance(data_source, FileSource) or isinstance( - data_source, SparkSource + # TODO(adchia): Move Spark source inference out of this logic + if ( + isinstance(data_source, FileSource) + or "SparkSource" == data_source.__class__.__name__ ): ts_column_type_regex_pattern = r"^timestamp" elif isinstance(data_source, BigQuerySource): diff --git a/sdk/python/setup.py b/sdk/python/setup.py index f95dd2b806b..cecc04a0546 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -46,6 +46,7 @@ "fastavro>=1.1.0", "google-api-core>=1.23.0", "googleapis-common-protos==1.52.*", + "great_expectations>=0.14.0,<0.15.0", "grpcio>=1.34.0", "grpcio-reflection>=1.34.0", "Jinja2>=2.0.0", @@ -96,10 +97,6 @@ "pyspark>=3.0.0", ] -GE_REQUIRED = [ - "great_expectations>=0.14.0,<0.15.0" -] - CI_REQUIRED = ( [ "cryptography==3.3.2", @@ -146,7 +143,6 @@ + AWS_REQUIRED + SNOWFLAKE_REQUIRED + SPARK_REQUIRED - + GE_REQUIRED ) DEV_REQUIRED = ["mypy-protobuf>=3.1.0", "grpcio-testing==1.*"] + CI_REQUIRED @@ -250,7 +246,6 @@ def run(self): "redis": REDIS_REQUIRED, "snowflake": SNOWFLAKE_REQUIRED, "spark": SPARK_REQUIRED, - "ge": GE_REQUIRED, }, include_package_data=True, license="Apache", From f8579a9393c873948a6f4bcecc6a9822771bbd69 Mon Sep 17 00:00:00 2001 From: Danny Chiao Date: Sun, 6 Mar 2022 03:30:45 -0500 Subject: [PATCH 2/2] revert removing ge Signed-off-by: Danny Chiao --- sdk/python/feast/feature_store.py | 2 -- sdk/python/setup.py | 7 ++++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 89e4df1d5fc..19741bcf127 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -42,7 +42,6 @@ from feast.data_source import DataSource from feast.diff.infra_diff import InfraDiff, diff_infra_protos from feast.diff.registry_diff import RegistryDiff, apply_diff_to_registry, diff_between -from feast.dqm.profilers.ge_profiler import GEProfiler from feast.entity import Entity from feast.errors import ( EntityNotFoundException, @@ -881,7 +880,6 @@ def create_saved_dataset( storage: SavedDatasetStorage, tags: Optional[Dict[str, str]] = None, feature_service: Optional[FeatureService] = None, - profiler: Optional[GEProfiler] = None, ) -> SavedDataset: """ Execute provided retrieval job and persist its outcome in given storage. diff --git a/sdk/python/setup.py b/sdk/python/setup.py index cecc04a0546..f95dd2b806b 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -46,7 +46,6 @@ "fastavro>=1.1.0", "google-api-core>=1.23.0", "googleapis-common-protos==1.52.*", - "great_expectations>=0.14.0,<0.15.0", "grpcio>=1.34.0", "grpcio-reflection>=1.34.0", "Jinja2>=2.0.0", @@ -97,6 +96,10 @@ "pyspark>=3.0.0", ] +GE_REQUIRED = [ + "great_expectations>=0.14.0,<0.15.0" +] + CI_REQUIRED = ( [ "cryptography==3.3.2", @@ -143,6 +146,7 @@ + AWS_REQUIRED + SNOWFLAKE_REQUIRED + SPARK_REQUIRED + + GE_REQUIRED ) DEV_REQUIRED = ["mypy-protobuf>=3.1.0", "grpcio-testing==1.*"] + CI_REQUIRED @@ -246,6 +250,7 @@ def run(self): "redis": REDIS_REQUIRED, "snowflake": SNOWFLAKE_REQUIRED, "spark": SPARK_REQUIRED, + "ge": GE_REQUIRED, }, include_package_data=True, license="Apache",