From 68f6e5e738f82876ada2dd7688d011765a88057e Mon Sep 17 00:00:00 2001 From: Shai Bruhis Date: Tue, 8 Mar 2022 16:19:10 -0800 Subject: [PATCH 1/8] added Redshift typecheck to data_source event_timestamp_col inference Signed-off-by: Shai Bruhis --- sdk/python/feast/inference.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index b3e51b48162..f2d4696fe17 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -45,7 +45,7 @@ def update_entities_with_inferred_types_from_feature_views( # get entity information from information extracted from the view batch source extracted_entity_name_type_pairs = list( - filter(lambda tup: tup[0] == entity.join_key, col_names_and_types,) + filter(lambda tup: tup[0] == entity.join_key,col_names_and_types,) ) if len(extracted_entity_name_type_pairs) == 0: # Doesn't mention inference error because would also be an error without inferencing @@ -54,8 +54,8 @@ def update_entities_with_inferred_types_from_feature_views( its entity's name.""" ) - inferred_value_type = view.batch_source.source_datatype_to_feast_value_type()( - extracted_entity_name_type_pairs[0][1] + inferred_value_type = ( + view.batch_source.source_datatype_to_feast_value_type()(extracted_entity_name_type_pairs[0][1]) ) if ( @@ -111,6 +111,7 @@ def update_data_sources_with_inferred_event_timestamp_col( assert ( isinstance(data_source, FileSource) or isinstance(data_source, BigQuerySource) + or isinstance(data_source, RedshiftSource) or isinstance(data_source, SnowflakeSource) ) From 7f1f15f629f0d41f62c4de7bdf5eb68ba1e8e8df Mon Sep 17 00:00:00 2001 From: Shai Bruhis Date: Wed, 9 Mar 2022 12:03:20 -0800 Subject: [PATCH 2/8] address comments Signed-off-by: Shai Bruhis --- sdk/python/feast/inference.py | 1 + .../integration/registration/test_inference.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index f2d4696fe17..f082ed6b625 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -113,6 +113,7 @@ def update_data_sources_with_inferred_event_timestamp_col( or isinstance(data_source, BigQuerySource) or isinstance(data_source, RedshiftSource) or isinstance(data_source, SnowflakeSource) + or "SparkSource" == data_source.__class__.__name__ ) # loop through table columns to find singular match diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 2582e69ea37..9d0feee5998 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -111,7 +111,9 @@ def test_infer_datasource_names_dwh(): @pytest.mark.integration -def test_update_data_sources_with_inferred_event_timestamp_col(simple_dataset_1): +def test_update_data_sources_with_inferred_event_timestamp_col( + simple_dataset_1, universal_data_sources +): df_with_two_viable_timestamp_cols = simple_dataset_1.copy(deep=True) df_with_two_viable_timestamp_cols["ts_2"] = simple_dataset_1["ts_1"] @@ -137,6 +139,16 @@ def test_update_data_sources_with_inferred_event_timestamp_col(simple_dataset_1) [file_source], RepoConfig(provider="local", project="test") ) + (_, _, data_sources) = universal_data_sources + update_data_sources_with_inferred_event_timestamp_col( + data_sources, RepoConfig(provider="local", project="test") + ) + actual_event_timestamp_cols = [ + source.event_timestamp_column for source in data_sources + ] + + assert actual_event_timestamp_cols == ["event_timestamp" * len(data_sources)] + def test_on_demand_features_type_inference(): # Create Feature Views From f6e59074bc691f6f4cd704587d4930aac27c975e Mon Sep 17 00:00:00 2001 From: Shai Bruhis Date: Wed, 9 Mar 2022 12:23:11 -0800 Subject: [PATCH 3/8] moved non file data sources into their own test Signed-off-by: Shai Bruhis --- sdk/python/tests/integration/registration/test_inference.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 9d0feee5998..7d9ff4b6351 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -111,8 +111,8 @@ def test_infer_datasource_names_dwh(): @pytest.mark.integration -def test_update_data_sources_with_inferred_event_timestamp_col( - simple_dataset_1, universal_data_sources +def test_update_file_data_source_with_inferred_event_timestamp_col( + simple_dataset_1 ): df_with_two_viable_timestamp_cols = simple_dataset_1.copy(deep=True) df_with_two_viable_timestamp_cols["ts_2"] = simple_dataset_1["ts_1"] @@ -139,6 +139,8 @@ def test_update_data_sources_with_inferred_event_timestamp_col( [file_source], RepoConfig(provider="local", project="test") ) +@pytest.mark.universal +def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_sources): (_, _, data_sources) = universal_data_sources update_data_sources_with_inferred_event_timestamp_col( data_sources, RepoConfig(provider="local", project="test") From ca788ced1f9fbe3254d0f514303cdafe487e13ff Mon Sep 17 00:00:00 2001 From: Shai Bruhis Date: Wed, 9 Mar 2022 12:28:35 -0800 Subject: [PATCH 4/8] addressed comments Signed-off-by: Shai Bruhis --- sdk/python/feast/inference.py | 6 +++--- sdk/python/tests/integration/registration/test_inference.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index f082ed6b625..51c4e9d78ec 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -45,7 +45,7 @@ def update_entities_with_inferred_types_from_feature_views( # get entity information from information extracted from the view batch source extracted_entity_name_type_pairs = list( - filter(lambda tup: tup[0] == entity.join_key,col_names_and_types,) + filter(lambda tup: tup[0] == entity.join_key, col_names_and_types,) ) if len(extracted_entity_name_type_pairs) == 0: # Doesn't mention inference error because would also be an error without inferencing @@ -54,8 +54,8 @@ def update_entities_with_inferred_types_from_feature_views( its entity's name.""" ) - inferred_value_type = ( - view.batch_source.source_datatype_to_feast_value_type()(extracted_entity_name_type_pairs[0][1]) + inferred_value_type = view.batch_source.source_datatype_to_feast_value_type()( + extracted_entity_name_type_pairs[0][1] ) if ( diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 7d9ff4b6351..a21ebe80598 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -111,9 +111,7 @@ def test_infer_datasource_names_dwh(): @pytest.mark.integration -def test_update_file_data_source_with_inferred_event_timestamp_col( - simple_dataset_1 -): +def test_update_file_data_source_with_inferred_event_timestamp_col(simple_dataset_1): df_with_two_viable_timestamp_cols = simple_dataset_1.copy(deep=True) df_with_two_viable_timestamp_cols["ts_2"] = simple_dataset_1["ts_1"] @@ -139,6 +137,8 @@ def test_update_file_data_source_with_inferred_event_timestamp_col( [file_source], RepoConfig(provider="local", project="test") ) + +@pytest.mark.integration @pytest.mark.universal def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_sources): (_, _, data_sources) = universal_data_sources From d6703b9db2a5a88e7ae117bc92de7f720625abe8 Mon Sep 17 00:00:00 2001 From: Shai Bruhis Date: Wed, 9 Mar 2022 16:52:26 -0800 Subject: [PATCH 5/8] fixed texts Signed-off-by: Shai Bruhis --- sdk/python/tests/integration/registration/test_inference.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index a21ebe80598..a1002146a83 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -142,8 +142,10 @@ def test_update_file_data_source_with_inferred_event_timestamp_col(simple_datase @pytest.mark.universal def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_sources): (_, _, data_sources) = universal_data_sources + update_data_sources_with_inferred_event_timestamp_col( - data_sources, RepoConfig(provider="local", project="test") + universal_data_sources.asdict().values(), + RepoConfig(provider="local", project="test"), ) actual_event_timestamp_cols = [ source.event_timestamp_column for source in data_sources From ef17d06cedd8e7761c8b9eb0aa510067bcbc58a2 Mon Sep 17 00:00:00 2001 From: Shai Bruhis Date: Thu, 10 Mar 2022 10:48:47 -0800 Subject: [PATCH 6/8] remove previously defined event_timestamp_column from data_source to allow for inference Signed-off-by: Shai Bruhis --- .../integration/feature_repos/repo_configuration.py | 3 +++ .../tests/integration/registration/test_inference.py | 10 +++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 61cad5606f1..ef57977fcbe 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -203,6 +203,9 @@ class UniversalDataSources: global_ds: DataSource field_mapping: DataSource + def values(self): + return dataclasses.asdict(self).values() + def construct_universal_data_sources( datasets: UniversalDatasets, data_source_creator: DataSourceCreator diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index a1002146a83..d0f083c4bde 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -143,15 +143,19 @@ def test_update_file_data_source_with_inferred_event_timestamp_col(simple_datase def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_sources): (_, _, data_sources) = universal_data_sources + # remove defined event_timestamp_column to allow for inference + for data_source in data_sources.values(): + data_source.event_timestamp_column = None + update_data_sources_with_inferred_event_timestamp_col( - universal_data_sources.asdict().values(), + data_sources.values(), RepoConfig(provider="local", project="test"), ) actual_event_timestamp_cols = [ - source.event_timestamp_column for source in data_sources + source.event_timestamp_column for source in data_sources.values() ] - assert actual_event_timestamp_cols == ["event_timestamp" * len(data_sources)] + assert actual_event_timestamp_cols == ["event_timestamp"] * len(data_sources.values()) def test_on_demand_features_type_inference(): From 87981e8ec822ed027b1fe5ddf7dd147bc867c1e9 Mon Sep 17 00:00:00 2001 From: Shai Bruhis Date: Fri, 11 Mar 2022 23:11:14 -0800 Subject: [PATCH 7/8] made a deepcopy of data_sources to not affect other tests Signed-off-by: Shai Bruhis --- .../integration/registration/test_inference.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index d0f083c4bde..4adf4a5e8e9 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -1,3 +1,4 @@ +from copy import deepcopy import pandas as pd import pytest @@ -142,21 +143,23 @@ def test_update_file_data_source_with_inferred_event_timestamp_col(simple_datase @pytest.mark.universal def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_sources): (_, _, data_sources) = universal_data_sources + data_sources_copy = deepcopy(data_sources) # remove defined event_timestamp_column to allow for inference - for data_source in data_sources.values(): + for data_source in data_sources_copy.values(): data_source.event_timestamp_column = None update_data_sources_with_inferred_event_timestamp_col( - data_sources.values(), + data_sources_copy.values(), RepoConfig(provider="local", project="test"), ) actual_event_timestamp_cols = [ - source.event_timestamp_column for source in data_sources.values() + source.event_timestamp_column for source in data_sources_copy.values() ] - assert actual_event_timestamp_cols == ["event_timestamp"] * len(data_sources.values()) - + assert actual_event_timestamp_cols == ["event_timestamp"] * len( + data_sources_copy.values() + ) def test_on_demand_features_type_inference(): # Create Feature Views From 302e295fa76d5b69413ded79e0f0a02972f0529a Mon Sep 17 00:00:00 2001 From: Shai Bruhis Date: Tue, 15 Mar 2022 00:37:56 -0700 Subject: [PATCH 8/8] linter Signed-off-by: Shai Bruhis --- sdk/python/tests/integration/registration/test_inference.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 4adf4a5e8e9..0ea6276669d 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -1,4 +1,5 @@ from copy import deepcopy + import pandas as pd import pytest @@ -150,8 +151,7 @@ def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_so data_source.event_timestamp_column = None update_data_sources_with_inferred_event_timestamp_col( - data_sources_copy.values(), - RepoConfig(provider="local", project="test"), + data_sources_copy.values(), RepoConfig(provider="local", project="test"), ) actual_event_timestamp_cols = [ source.event_timestamp_column for source in data_sources_copy.values() @@ -161,6 +161,7 @@ def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_so data_sources_copy.values() ) + def test_on_demand_features_type_inference(): # Create Feature Views date_request = RequestDataSource(