diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 73c2f14a638..b2a00e4a736 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -399,14 +399,14 @@ def delete_feature_view(self, name: str): @log_exceptions_and_usage def delete_feature_service(self, name: str): """ - Deletes a feature service. + Deletes a feature service. - Args: - name: Name of feature service. + Args: + name: Name of feature service. - Raises: - FeatureServiceNotFoundException: The feature view could not be found. - """ + Raises: + FeatureServiceNotFoundException: The feature view could not be found. + """ return self._registry.delete_feature_service(name, self.project) def _get_features( @@ -903,17 +903,17 @@ def create_saved_dataset( feature_service: Optional[FeatureService] = None, ) -> SavedDataset: """ - Execute provided retrieval job and persist its outcome in given storage. - Storage type (eg, BigQuery or Redshift) must be the same as globally configured offline store. - After data successfully persisted saved dataset object with dataset metadata is committed to the registry. - Name for the saved dataset should be unique within project, since it's possible to overwrite previously stored dataset - with the same name. + Execute provided retrieval job and persist its outcome in given storage. + Storage type (eg, BigQuery or Redshift) must be the same as globally configured offline store. + After data successfully persisted saved dataset object with dataset metadata is committed to the registry. + Name for the saved dataset should be unique within project, since it's possible to overwrite previously stored dataset + with the same name. - Returns: - SavedDataset object with attached RetrievalJob + Returns: + SavedDataset object with attached RetrievalJob - Raises: - ValueError if given retrieval job doesn't have metadata + Raises: + ValueError if given retrieval job doesn't have metadata """ warnings.warn( "Saving dataset is an experimental feature. " @@ -1589,11 +1589,11 @@ def _get_unique_entities( join_key_values: Dict[str, List[Value]], entity_name_to_join_key_map: Dict[str, str], ) -> Tuple[Tuple[Dict[str, Value], ...], Tuple[List[int], ...]]: - """ Return the set of unique composite Entities for a Feature View and the indexes at which they appear. + """Return the set of unique composite Entities for a Feature View and the indexes at which they appear. - This method allows us to query the OnlineStore for data we need only once - rather than requesting and processing data for the same combination of - Entities multiple times. + This method allows us to query the OnlineStore for data we need only once + rather than requesting and processing data for the same combination of + Entities multiple times. """ # Get the correct set of entity values with the correct join keys. table_entity_values = self._get_table_entity_values( @@ -1629,14 +1629,14 @@ def _read_from_online_store( requested_features: List[str], table: FeatureView, ) -> List[Tuple[List[Timestamp], List["FieldStatus.ValueType"], List[Value]]]: - """ Read and process data from the OnlineStore for a given FeatureView. + """Read and process data from the OnlineStore for a given FeatureView. - This method guarantees that the order of the data in each element of the - List returned is the same as the order of `requested_features`. + This method guarantees that the order of the data in each element of the + List returned is the same as the order of `requested_features`. - This method assumes that `provider.online_read` returns data for each - combination of Entities in `entity_rows` in the same order as they - are provided. + This method assumes that `provider.online_read` returns data for each + combination of Entities in `entity_rows` in the same order as they + are provided. """ # Instantiate one EntityKeyProto per Entity. entity_key_protos = [ @@ -1693,23 +1693,23 @@ def _populate_response_from_feature_data( requested_features: Iterable[str], table: FeatureView, ): - """ Populate the GetOnlineFeaturesResponse with feature data. - - This method assumes that `_read_from_online_store` returns data for each - combination of Entities in `entity_rows` in the same order as they - are provided. - - Args: - feature_data: A list of data in Protobuf form which was retrieved from the OnlineStore. - indexes: A list of indexes which should be the same length as `feature_data`. Each list - of indexes corresponds to a set of result rows in `online_features_response`. - online_features_response: The object to populate. - full_feature_names: A boolean that provides the option to add the feature view prefixes to the feature names, - changing them from the format "feature" to "feature_view__feature" (e.g., "daily_transactions" changes to - "customer_fv__daily_transactions"). - requested_features: The names of the features in `feature_data`. This should be ordered in the same way as the - data in `feature_data`. - table: The FeatureView that `feature_data` was retrieved from. + """Populate the GetOnlineFeaturesResponse with feature data. + + This method assumes that `_read_from_online_store` returns data for each + combination of Entities in `entity_rows` in the same order as they + are provided. + + Args: + feature_data: A list of data in Protobuf form which was retrieved from the OnlineStore. + indexes: A list of indexes which should be the same length as `feature_data`. Each list + of indexes corresponds to a set of result rows in `online_features_response`. + online_features_response: The object to populate. + full_feature_names: A boolean that provides the option to add the feature view prefixes to the feature names, + changing them from the format "feature" to "feature_view__feature" (e.g., "daily_transactions" changes to + "customer_fv__daily_transactions"). + requested_features: The names of the features in `feature_data`. This should be ordered in the same way as the + data in `feature_data`. + table: The FeatureView that `feature_data` was retrieved from. """ # Add the feature names to the response. requested_feature_refs = [ @@ -1973,7 +1973,7 @@ def _group_feature_refs( List[Tuple[RequestFeatureView, List[str]]], Set[str], ]: - """ Get list of feature views and corresponding feature names based on feature references""" + """Get list of feature views and corresponding feature names based on feature references""" # view name to view proto view_index = {view.projection.name_to_use(): view for view in all_feature_views} @@ -2046,7 +2046,7 @@ def _print_materialization_log( def _validate_feature_views(feature_views: List[BaseFeatureView]): - """ Verify feature views have case-insensitively unique names""" + """Verify feature views have case-insensitively unique names""" fv_names = set() for fv in feature_views: case_insensitive_fv_name = fv.name.lower() @@ -2061,7 +2061,7 @@ def _validate_feature_views(feature_views: List[BaseFeatureView]): def _validate_data_sources(data_sources: List[DataSource]): - """ Verify data sources have case-insensitively unique names""" + """Verify data sources have case-insensitively unique names""" ds_names = set() for fv in data_sources: case_insensitive_ds_name = fv.name.lower() diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 44e62d6ad1a..6c0d56562ca 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -56,7 +56,7 @@ class BigQueryOfflineStoreConfig(FeastConfigBaseModel): - """ Offline store config for GCP BigQuery """ + """Offline store config for GCP BigQuery""" type: Literal["bigquery"] = "bigquery" """ Offline store type selector""" diff --git a/sdk/python/feast/infra/offline_stores/bigquery_source.py b/sdk/python/feast/infra/offline_stores/bigquery_source.py index 92b6939fc3a..24593581c7c 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery_source.py +++ b/sdk/python/feast/infra/offline_stores/bigquery_source.py @@ -27,20 +27,20 @@ def __init__( ): """Create a BigQuerySource from an existing table or query. - Args: - table (optional): The BigQuery table where features can be found. - table_ref (optional): (Deprecated) The BigQuery table where features can be found. - event_timestamp_column: Event timestamp column used for point in time joins of feature values. - created_timestamp_column (optional): Timestamp column when row was created, used for deduplicating rows. - field_mapping: A dictionary mapping of column names in this data source to feature names in a feature table - or view. Only used for feature columns, not entities or timestamp columns. - date_partition_column (optional): Timestamp column used for partitioning. - query (optional): SQL query to execute to generate data for this data source. - name (optional): Name for the source. Defaults to the table_ref if not specified. - Example: - >>> from feast import BigQuerySource - >>> my_bigquery_source = BigQuerySource(table="gcp_project:bq_dataset.bq_table") - """ + Args: + table (optional): The BigQuery table where features can be found. + table_ref (optional): (Deprecated) The BigQuery table where features can be found. + event_timestamp_column: Event timestamp column used for point in time joins of feature values. + created_timestamp_column (optional): Timestamp column when row was created, used for deduplicating rows. + field_mapping: A dictionary mapping of column names in this data source to feature names in a feature table + or view. Only used for feature columns, not entities or timestamp columns. + date_partition_column (optional): Timestamp column used for partitioning. + query (optional): SQL query to execute to generate data for this data source. + name (optional): Name for the source. Defaults to the table_ref if not specified. + Example: + >>> from feast import BigQuerySource + >>> my_bigquery_source = BigQuerySource(table="gcp_project:bq_dataset.bq_table") + """ if table is None and table_ref is None and query is None: raise ValueError('No "table" or "query" argument provided.') if not table and table_ref: diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index c71f0c3ff74..b39e8f5c2de 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -31,7 +31,7 @@ class FileOfflineStoreConfig(FeastConfigBaseModel): - """ Offline store config for local (file-based) store """ + """Offline store config for local (file-based) store""" type: Literal["file"] = "file" """ Offline store type selector""" diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py index 3efd45bc741..e67cf13f5c4 100644 --- a/sdk/python/feast/infra/offline_stores/redshift.py +++ b/sdk/python/feast/infra/offline_stores/redshift.py @@ -39,7 +39,7 @@ class RedshiftOfflineStoreConfig(FeastConfigBaseModel): - """ Offline store config for AWS Redshift """ + """Offline store config for AWS Redshift""" type: Literal["redshift"] = "redshift" """ Offline store type selector""" @@ -341,7 +341,7 @@ def _to_arrow_internal(self) -> pa.Table: @log_exceptions_and_usage def to_s3(self) -> str: - """ Export dataset to S3 in Parquet format and return path """ + """Export dataset to S3 in Parquet format and return path""" if self.on_demand_feature_views: transformed_df = self.to_df() aws_utils.upload_df_to_s3(self._s3_resource, self._s3_path, transformed_df) @@ -361,7 +361,7 @@ def to_s3(self) -> str: @log_exceptions_and_usage def to_redshift(self, table_name: str) -> None: - """ Save dataset as a new Redshift table """ + """Save dataset as a new Redshift table""" if self.on_demand_feature_views: transformed_df = self.to_df() aws_utils.upload_df_to_redshift( diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index ee8cd71ce05..cc346251a82 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -54,7 +54,7 @@ class SnowflakeOfflineStoreConfig(FeastConfigBaseModel): - """ Offline store config for Snowflake """ + """Offline store config for Snowflake""" type: Literal["snowflake.offline"] = "snowflake.offline" """ Offline store type selector""" @@ -336,7 +336,7 @@ def _to_arrow_internal(self) -> pa.Table: ) def to_snowflake(self, table_name: str) -> None: - """ Save dataset as a new Snowflake table """ + """Save dataset as a new Snowflake table""" if self.on_demand_feature_views is not None: transformed_df = self.to_df() diff --git a/sdk/python/feast/infra/online_stores/datastore.py b/sdk/python/feast/infra/online_stores/datastore.py index e7621ab88f8..e975ce138ca 100644 --- a/sdk/python/feast/infra/online_stores/datastore.py +++ b/sdk/python/feast/infra/online_stores/datastore.py @@ -55,7 +55,7 @@ class DatastoreOnlineStoreConfig(FeastConfigBaseModel): - """ Online store config for GCP Datastore """ + """Online store config for GCP Datastore""" type: Literal["datastore"] = "datastore" """ Online store type selector""" diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index e65aab4e7be..710f4c386a6 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -37,7 +37,7 @@ class SqliteOnlineStoreConfig(FeastConfigBaseModel): - """ Online store config for local (SQLite-based) store """ + """Online store config for local (SQLite-based) store""" type: Literal[ "sqlite", "feast.infra.online_stores.sqlite.SqliteOnlineStore" diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index b3f10292423..4441b77c644 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -183,7 +183,7 @@ def retrieve_saved_dataset( Returns: RetrievalJob object, which is lazy wrapper for actual query performed under the hood. - """ + """ ... def get_feature_server_endpoint(self) -> Optional[str]: diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index e7f628795d8..fe5eed774ec 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -151,7 +151,7 @@ def execute_redshift_statement( def get_redshift_statement_result(redshift_data_client, statement_id: str) -> dict: - """ Get the Redshift statement result """ + """Get the Redshift statement result""" return redshift_data_client.get_statement_result(Id=statement_id) @@ -306,7 +306,7 @@ def temporarily_upload_df_to_redshift( def download_s3_directory(s3_resource, bucket: str, key: str, local_dir: str): - """ Download the S3 directory to a local disk """ + """Download the S3 directory to a local disk""" bucket_obj = s3_resource.Bucket(bucket) if key != "" and not key.endswith("/"): key = key + "/" @@ -318,7 +318,7 @@ def download_s3_directory(s3_resource, bucket: str, key: str, local_dir: str): def delete_s3_directory(s3_resource, bucket: str, key: str): - """ Delete S3 directory recursively """ + """Delete S3 directory recursively""" bucket_obj = s3_resource.Bucket(bucket) if key != "" and not key.endswith("/"): key = key + "/" @@ -365,7 +365,7 @@ def unload_redshift_query_to_pa( iam_role: str, query: str, ) -> pa.Table: - """ Unload Redshift Query results to S3 and get the results in PyArrow Table format """ + """Unload Redshift Query results to S3 and get the results in PyArrow Table format""" bucket, key = get_bucket_and_key(s3_path) execute_redshift_query_and_unload_to_s3( @@ -388,7 +388,7 @@ def unload_redshift_query_to_df( iam_role: str, query: str, ) -> pd.DataFrame: - """ Unload Redshift Query results to S3 and get the results in Pandas DataFrame format """ + """Unload Redshift Query results to S3 and get the results in Pandas DataFrame format""" table = unload_redshift_query_to_pa( redshift_data_client, cluster_id, diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index 4fc46b91faf..fce13d8f61e 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -57,7 +57,7 @@ class FeastBaseModel(BaseModel): - """ Feast Pydantic Configuration Class """ + """Feast Pydantic Configuration Class""" class Config: arbitrary_types_allowed = True @@ -65,7 +65,7 @@ class Config: class FeastConfigBaseModel(BaseModel): - """ Feast Pydantic Configuration Class """ + """Feast Pydantic Configuration Class""" class Config: arbitrary_types_allowed = True @@ -73,7 +73,7 @@ class Config: class RegistryConfig(FeastBaseModel): - """ Metadata Store Configuration. Configuration that relates to reading from and writing to the Feast registry.""" + """Metadata Store Configuration. Configuration that relates to reading from and writing to the Feast registry.""" registry_store_type: Optional[StrictStr] """ str: Provider name or a class name that implements RegistryStore. """ @@ -89,7 +89,7 @@ class RegistryConfig(FeastBaseModel): class RepoConfig(FeastBaseModel): - """ Repo config. Typically loaded from `feature_store.yaml` """ + """Repo config. Typically loaded from `feature_store.yaml`""" registry: Union[StrictStr, RegistryConfig] = "data/registry.db" """ str: Path to metadata store. Can be a local path, or remote object storage path, e.g. a GCS URI """ diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py index 3457aa48866..5800e6c62b4 100644 --- a/sdk/python/feast/repo_operations.py +++ b/sdk/python/feast/repo_operations.py @@ -94,7 +94,7 @@ def get_repo_files(repo_root: Path) -> List[Path]: def parse_repo(repo_root: Path) -> RepoContents: - """ Collect feature table definitions from feature repo """ + """Collect feature table definitions from feature repo""" res = RepoContents( data_sources=set(), entities=set(), @@ -264,7 +264,7 @@ def teardown(repo_config: RepoConfig, repo_path: Path): @log_exceptions_and_usage def registry_dump(repo_config: RepoConfig, repo_path: Path): - """ For debugging only: output contents of the metadata registry """ + """For debugging only: output contents of the metadata registry""" registry_config = repo_config.get_registry_config() project = repo_config.project registry = Registry(registry_config=registry_config, repo_path=repo_path) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 713b952d092..9798faf508b 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -580,10 +580,10 @@ def pa_to_redshift_value_type(pa_type: pyarrow.DataType) -> str: def _non_empty_value(value: Any) -> bool: """ - Check that there's enough data we can use for type inference. - If primitive type - just checking that it's not None - If iterable - checking that there's some elements (len > 0) - String is special case: "" - empty string is considered non empty + Check that there's enough data we can use for type inference. + If primitive type - just checking that it's not None + If iterable - checking that there's some elements (len > 0) + String is special case: "" - empty string is considered non empty """ return value is not None and ( not isinstance(value, Sized) or len(value) > 0 or isinstance(value, str) diff --git a/sdk/python/feast/usage.py b/sdk/python/feast/usage.py index 6a6a7146ce7..90b659479d1 100644 --- a/sdk/python/feast/usage.py +++ b/sdk/python/feast/usage.py @@ -224,27 +224,27 @@ def tracing_span(name): def log_exceptions_and_usage(*args, **attrs): """ - This function decorator enables three components: - 1. Error tracking - 2. Usage statistic collection - 3. Time profiling - - This data is being collected, anonymized and sent to Feast Developers. - All events from nested decorated functions are being grouped into single event - to build comprehensive context useful for profiling and error tracking. - - Usage example (will result in one output event): - @log_exceptions_and_usage - def fn(...): - nested() - - @log_exceptions_and_usage(attr='value') - def nested(...): - deeply_nested() - - @log_exceptions_and_usage(attr2='value2', sample=RateSampler(rate=0.1)) - def deeply_nested(...): - ... + This function decorator enables three components: + 1. Error tracking + 2. Usage statistic collection + 3. Time profiling + + This data is being collected, anonymized and sent to Feast Developers. + All events from nested decorated functions are being grouped into single event + to build comprehensive context useful for profiling and error tracking. + + Usage example (will result in one output event): + @log_exceptions_and_usage + def fn(...): + nested() + + @log_exceptions_and_usage(attr='value') + def nested(...): + deeply_nested() + + @log_exceptions_and_usage(attr2='value2', sample=RateSampler(rate=0.1)) + def deeply_nested(...): + ... """ sampler = attrs.pop("sampler", AlwaysSampler()) diff --git a/sdk/python/feast/utils.py b/sdk/python/feast/utils.py index 890c48fcbe2..e521338680c 100644 --- a/sdk/python/feast/utils.py +++ b/sdk/python/feast/utils.py @@ -4,7 +4,7 @@ def make_tzaware(t: datetime) -> datetime: - """ We assume tz-naive datetimes are UTC """ + """We assume tz-naive datetimes are UTC""" if t.tzinfo is None: return t.replace(tzinfo=utc) else: diff --git a/sdk/python/tests/integration/e2e/test_usage_e2e.py b/sdk/python/tests/integration/e2e/test_usage_e2e.py index c7b62b3a5da..12c1eb86281 100644 --- a/sdk/python/tests/integration/e2e/test_usage_e2e.py +++ b/sdk/python/tests/integration/e2e/test_usage_e2e.py @@ -136,7 +136,7 @@ def test_exception_usage_off(dummy_exporter, enabling_toggle): def _reload_feast(): - """ After changing environment need to reload modules and rerun usage decorators """ + """After changing environment need to reload modules and rerun usage decorators""" modules = ( "feast.infra.local", "feast.infra.online_stores.sqlite", diff --git a/sdk/python/tests/integration/online_store/test_e2e_local.py b/sdk/python/tests/integration/online_store/test_e2e_local.py index d14bc5ab1cc..c1aa10900ae 100644 --- a/sdk/python/tests/integration/online_store/test_e2e_local.py +++ b/sdk/python/tests/integration/online_store/test_e2e_local.py @@ -12,7 +12,7 @@ def _get_last_feature_row(df: pd.DataFrame, driver_id, max_date: datetime): - """ Manually extract last feature value from a dataframe for a given driver_id with up to `max_date` date """ + """Manually extract last feature value from a dataframe for a given driver_id with up to `max_date` date""" filtered = df[ (df["driver_id"] == driver_id) & (df["event_timestamp"] < max_date.replace(tzinfo=utc)) diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index d5496a6de75..c7345d3f4d6 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -485,7 +485,7 @@ def test_reapply_feature_view_success(test_feature_store, dataframe_source): def test_apply_conflicting_featureview_names(feature_store_with_local_registry): - """ Test applying feature views with non-case-insensitively unique names""" + """Test applying feature views with non-case-insensitively unique names""" driver_stats = FeatureView( name="driver_hourly_stats", diff --git a/sdk/python/tests/utils/online_read_write_test.py b/sdk/python/tests/utils/online_read_write_test.py index fe03217dabe..39846cd2ad4 100644 --- a/sdk/python/tests/utils/online_read_write_test.py +++ b/sdk/python/tests/utils/online_read_write_test.py @@ -22,7 +22,7 @@ def basic_rw_test( ) def _driver_rw_test(event_ts, created_ts, write, expect_read): - """ A helper function to write values and read them back """ + """A helper function to write values and read them back""" write_lat, write_lon = write expect_lat, expect_lon = expect_read provider.online_write_batch( diff --git a/ui/src/parsers/types.ts b/ui/src/parsers/types.ts index e32d5b102ea..2f88eea4f06 100644 --- a/ui/src/parsers/types.ts +++ b/ui/src/parsers/types.ts @@ -13,7 +13,17 @@ enum FEAST_FEATURE_VALUE_TYPES { BYTES = "BYTES", INT32 = "INT32", DOUBLE = "DOUBLE", - UNIX_TIMESTAMP = "UNIX_TIMESTAMP" + UNIX_TIMESTAMP = "UNIX_TIMESTAMP", + INVALID = "INVALID", + BYTES_LIST = "BYTES_LIST", + STRING_LIST = "STRING_LIST", + INT32_LIST = "INT32_LIST", + INT64_LIST = "INT64_LIST", + DOUBLE_LIST = "DOUBLE_LIST", + FLOAT_LIST = "FLOAT_LIST", + BOOL_LIST = "BOOL_LIST", + UNIX_TIMESTAMP_LIST = "UNIX_TIMESTAMP_LIST", + NULL = "NULL" } export { FEAST_FCO_TYPES, FEAST_FEATURE_VALUE_TYPES };