From b349ad61d4285ee3f8f152714c52db66a80f680c Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Thu, 25 Aug 2022 11:57:24 -0400 Subject: [PATCH 01/24] Initial implementation of BigTable online store. Signed-off-by: Abhin Chhabra --- .../contrib/bigtable_online_store/README.md | 1 + .../contrib/bigtable_online_store/__init__.py | 0 .../contrib/bigtable_online_store/bigtable.py | 212 ++++++++++++++++++ 3 files changed, 213 insertions(+) create mode 100644 sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/README.md create mode 100644 sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/__init__.py create mode 100644 sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py diff --git a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/README.md b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/README.md new file mode 100644 index 00000000000..5b0e8da2d75 --- /dev/null +++ b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/README.md @@ -0,0 +1 @@ +# BigTable Online Store diff --git a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/__init__.py b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py new file mode 100644 index 00000000000..268baccfd86 --- /dev/null +++ b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py @@ -0,0 +1,212 @@ +import logging +from concurrent import futures +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple + +import google +from feast import Entity, FeatureView +from feast.infra.online_stores.online_store import OnlineStore +from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import Value as ValueProto, ValueType +from feast.repo_config import FeastConfigBaseModel, RepoConfig +from google.cloud import bigtable +from pydantic import StrictStr +from pydantic.typing import Literal +from feast.infra.online_stores.helpers import compute_entity_id +from feast.usage import get_user_agent, log_exceptions_and_usage, tracing_span + +logger = logging.getLogger(__name__) + +# Number of mutations per BigTable write operation we're aiming for. The official max is 100K; we're +# being conservative. +MUTATIONS_PER_OP = 50_000 +# The Bigtable client library limits the connection pool size to 10. This imposes a limitation to +# the concurrency we can get using a thread pool in each worker. +BIGTABLE_CLIENT_CONNECTION_POOL_SIZE = 10 + + +class BigTableOnlineStoreConfig(FeastConfigBaseModel): + type: Literal["bigtable"] = "bigtable" + + project: StrictStr + instance: StrictStr + max_versions: int = 2 + + +class BigTableOnlineStore(OnlineStore): + _client: Optional[bigtable.Client] = None + + @log_exceptions_and_usage(online_store="bigtable") + def online_read( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + feature_view = table + bt_table_name = self._get_table_name(config=config, feature_view=feature_view) + column_family_id = feature_view.name + + client = bigtable.Client(project=config.online_store.project) + bt_instance = client.instance(instance_id=config.online_store.instance) + bt_table = bt_instance.table(bt_table_name) + row_keys = [compute_entity_id(entity_key) for entity_key in entity_keys] + + batch_result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] + + # TODO: read all the rows in a single call instead of reading them sequentially + for row_key in row_keys: + res = {} + # TODO: use filters to reduce the amount of data transfered and skip unnecessary columns. + row = bt_table.read_row(row_key) + + if row is None: + continue + + for feature_name, feature_values in row.cells.get(column_family_id, {}).items(): + # We only want to retrieve the latest value for each feature + feature_value = feature_values[0] + val = ValueProto() + val.ParseFromString(feature_value.value) + res[feature_name.decode()] = val + + batch_result.append((feature_value.timestamp, res)) + + result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] + + # Pad in case not all entities in a batch have responses + batch_size_nones = ((None, None),) * (len(row_keys) - len(batch_result)) + batch_result.extend(batch_size_nones) + result.extend(batch_result) + return result + + @log_exceptions_and_usage(online_store="bigtable") + def online_write_batch( + self, + config: RepoConfig, + table: FeatureView, + data: List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]], + progress: Optional[Callable[[int], Any]], + ) -> None: + feature_view = table + bt_table_name = self._get_table_name(config=config, feature_view=feature_view) + column_family_id = feature_view.name + + client = bigtable.Client(project=config.online_store.project) + bt_instance = client.instance(instance_id=config.online_store.instance) + bt_table = bt_instance.table(bt_table_name) + + # `columns_per_row` is used to calculate the number of rows we are allowed to mutate in one + # request. Since `MUTATIONS_PER_OP` is set much lower than the max allowed value, the + # calculation of `columns_per_row` doesn't need to be precise. Feature views can have 1 or 2 + # timestamp fields: event timestamp and created timestamp. We assume 2 conservatively. + columns_per_row = len(feature_view.features) + 2 # extra for 2 timestamps + rows_per_write = MUTATIONS_PER_OP // columns_per_row + + with futures.ThreadPoolExecutor(max_workers=BIGTABLE_CLIENT_CONNECTION_POOL_SIZE) as executor: + fs = [] + while data: + rows_to_write, data = data[:rows_per_write], data[rows_per_write:] + fs.append( + executor.submit( + self._write_rows_to_bt, + rows_to_write=rows_to_write, + bt_table=bt_table, + column_family_id=column_family_id, + ) + ) + futures.wait(fs) + + @staticmethod + def _write_rows_to_bt(rows_to_write, bt_table, column_family_id): + rows = [] + for row in rows_to_write: + entity_key, features, timestamp, created_ts = row + bt_row = bt_table.direct_row(compute_entity_id(entity_key)) + + for feature_name, feature_value in features.items(): + bt_row.set_cell(column_family_id, feature_name, feature_value.SerializeToString()) + # TODO: write timestamps during materialization as well + rows.append(bt_row) + bt_table.mutate_rows(rows) + + def update( + self, + config: RepoConfig, + tables_to_delete: Sequence[FeatureView], + tables_to_keep: Sequence[FeatureView], + entities_to_delete: Sequence[Entity], + entities_to_keep: Sequence[Entity], + partial: bool, + ): + """Creates the appropriate tables and column families in BigTable. + + We use a dedicated table for each entity combination. For example, if a FeatureView uses the entities `shop` and + `customer`, the resulting table would be called `customer-shop` (entities are sorted lexicographically first). + + FeatureViews are represented by column families in their respective tables. + """ + online_config = config.online_store + assert isinstance(online_config, BigTableOnlineStoreConfig) + client = self._get_client(online_config, admin=True) + bt_instance = client.instance(instance_id=online_config.instance) + max_versions_gc_rule = bigtable.column_family.MaxVersionsGCRule(online_config.max_versions) + + for feature_view in tables_to_keep: + table_name = self._get_table_name(config=config, feature_view=feature_view) + table = bt_instance.table(table_name) + if not table.exists(): + logger.info(f"Creating table `{table_name}` in BigTable for feature view `{feature_view.name}`") + table.create() + else: + logger.info(f"Table {table_name} already exists in BigTable") + + cfs = table.list_column_families() + if feature_view.name not in cfs: + table.column_family(feature_view.name, gc_rule=max_versions_gc_rule).create() + + for feature_view in tables_to_delete: + table_name = self._get_table_name(config=config, feature_view=feature_view) + table = bt_instance.table(table_name) + cfs = table.list_column_families() + cf = cfs.pop(feature_view.name, None) + if cf is not None: + cf.delete() + else: + logger.warning( + f"Skipping deletion of column family `{feature_view.name}` in table `{table_name}` since it " + "doesn't exist. Perhaps it was deleted manually." + ) + if not cfs: + logger.info( + f"We've deleted all column families from the table `{table_name}`, so we're deleting it too." + ) + table.delete() + + @staticmethod + def _get_table_name(config: RepoConfig, feature_view: FeatureView) -> str: + return f"{config.project}.{'-'.join(sorted(feature_view.entities))}" + + def teardown(self, config: RepoConfig, tables: Sequence[FeatureView], entities: Sequence[Entity]): + # Because of historical reasons, Feast calls them tables. We use this alias for readability. + feature_views = tables + + bt_tables = {self._get_table_name(config=config, feature_view=fv) for fv in feature_views} + + online_config = config.online_store + assert isinstance(online_config, BigTableOnlineStoreConfig) + client = self._get_client(online_config, admin=True) + bt_instance = client.instance(instance_id=online_config.instance) + for table_name in bt_tables: + try: + logger.info(f"Deleting BigTable table `{table_name}`") + bt_instance.table(table_name).delete() + except google.api_core.exceptions.NotFound: + logger.warning(f"Table `{table_name}` was not found. Skipping deletion.") + pass + + def _get_client(self, online_config: BigTableOnlineStoreConfig, admin: bool = False): + if not self._client: + self._client = bigtable.Client(project=online_config.project, admin=admin) + return self._client From f9f45bb21ad34638154e5ca58a2e36867307e817 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Mon, 5 Sep 2022 12:15:12 -0400 Subject: [PATCH 02/24] Attempt to run bigtable integration tests. Currently focusing on just getting the tests running locally. I've only build python3.8 requirements. Signed-off-by: Abhin Chhabra --- .../contrib/bigtable_online_store/bigtable.py | 63 +++++++++---- sdk/python/feast/repo_config.py | 1 + .../requirements/py3.8-ci-requirements.txt | 89 ++++++++++--------- .../requirements/py3.8-requirements.txt | 38 ++++---- .../feature_repos/repo_configuration.py | 5 ++ .../universal/online_store/bigtable.py | 39 ++++++++ setup.py | 1 + 7 files changed, 159 insertions(+), 77 deletions(-) create mode 100644 sdk/python/tests/integration/feature_repos/universal/online_store/bigtable.py diff --git a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py index 268baccfd86..4e039d71d1b 100644 --- a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py +++ b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py @@ -4,15 +4,17 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple import google -from feast import Entity, FeatureView -from feast.infra.online_stores.online_store import OnlineStore -from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto -from feast.protos.feast.types.Value_pb2 import Value as ValueProto, ValueType -from feast.repo_config import FeastConfigBaseModel, RepoConfig from google.cloud import bigtable from pydantic import StrictStr from pydantic.typing import Literal + +from feast import Entity, FeatureView from feast.infra.online_stores.helpers import compute_entity_id +from feast.infra.online_stores.online_store import OnlineStore +from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import Value as ValueProto +from feast.protos.feast.types.Value_pb2 import ValueType +from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.usage import get_user_agent, log_exceptions_and_usage, tracing_span logger = logging.getLogger(__name__) @@ -53,7 +55,9 @@ def online_read( bt_table = bt_instance.table(bt_table_name) row_keys = [compute_entity_id(entity_key) for entity_key in entity_keys] - batch_result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] + batch_result: List[ + Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]] + ] = [] # TODO: read all the rows in a single call instead of reading them sequentially for row_key in row_keys: @@ -64,7 +68,9 @@ def online_read( if row is None: continue - for feature_name, feature_values in row.cells.get(column_family_id, {}).items(): + for feature_name, feature_values in row.cells.get( + column_family_id, {} + ).items(): # We only want to retrieve the latest value for each feature feature_value = feature_values[0] val = ValueProto() @@ -86,7 +92,9 @@ def online_write_batch( self, config: RepoConfig, table: FeatureView, - data: List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]], + data: List[ + Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] + ], progress: Optional[Callable[[int], Any]], ) -> None: feature_view = table @@ -104,7 +112,9 @@ def online_write_batch( columns_per_row = len(feature_view.features) + 2 # extra for 2 timestamps rows_per_write = MUTATIONS_PER_OP // columns_per_row - with futures.ThreadPoolExecutor(max_workers=BIGTABLE_CLIENT_CONNECTION_POOL_SIZE) as executor: + with futures.ThreadPoolExecutor( + max_workers=BIGTABLE_CLIENT_CONNECTION_POOL_SIZE + ) as executor: fs = [] while data: rows_to_write, data = data[:rows_per_write], data[rows_per_write:] @@ -126,7 +136,9 @@ def _write_rows_to_bt(rows_to_write, bt_table, column_family_id): bt_row = bt_table.direct_row(compute_entity_id(entity_key)) for feature_name, feature_value in features.items(): - bt_row.set_cell(column_family_id, feature_name, feature_value.SerializeToString()) + bt_row.set_cell( + column_family_id, feature_name, feature_value.SerializeToString() + ) # TODO: write timestamps during materialization as well rows.append(bt_row) bt_table.mutate_rows(rows) @@ -151,20 +163,26 @@ def update( assert isinstance(online_config, BigTableOnlineStoreConfig) client = self._get_client(online_config, admin=True) bt_instance = client.instance(instance_id=online_config.instance) - max_versions_gc_rule = bigtable.column_family.MaxVersionsGCRule(online_config.max_versions) + max_versions_gc_rule = bigtable.column_family.MaxVersionsGCRule( + online_config.max_versions + ) for feature_view in tables_to_keep: table_name = self._get_table_name(config=config, feature_view=feature_view) table = bt_instance.table(table_name) if not table.exists(): - logger.info(f"Creating table `{table_name}` in BigTable for feature view `{feature_view.name}`") + logger.info( + f"Creating table `{table_name}` in BigTable for feature view `{feature_view.name}`" + ) table.create() else: logger.info(f"Table {table_name} already exists in BigTable") cfs = table.list_column_families() if feature_view.name not in cfs: - table.column_family(feature_view.name, gc_rule=max_versions_gc_rule).create() + table.column_family( + feature_view.name, gc_rule=max_versions_gc_rule + ).create() for feature_view in tables_to_delete: table_name = self._get_table_name(config=config, feature_view=feature_view) @@ -188,11 +206,18 @@ def update( def _get_table_name(config: RepoConfig, feature_view: FeatureView) -> str: return f"{config.project}.{'-'.join(sorted(feature_view.entities))}" - def teardown(self, config: RepoConfig, tables: Sequence[FeatureView], entities: Sequence[Entity]): + def teardown( + self, + config: RepoConfig, + tables: Sequence[FeatureView], + entities: Sequence[Entity], + ): # Because of historical reasons, Feast calls them tables. We use this alias for readability. feature_views = tables - bt_tables = {self._get_table_name(config=config, feature_view=fv) for fv in feature_views} + bt_tables = { + self._get_table_name(config=config, feature_view=fv) for fv in feature_views + } online_config = config.online_store assert isinstance(online_config, BigTableOnlineStoreConfig) @@ -203,10 +228,14 @@ def teardown(self, config: RepoConfig, tables: Sequence[FeatureView], entities: logger.info(f"Deleting BigTable table `{table_name}`") bt_instance.table(table_name).delete() except google.api_core.exceptions.NotFound: - logger.warning(f"Table `{table_name}` was not found. Skipping deletion.") + logger.warning( + f"Table `{table_name}` was not found. Skipping deletion." + ) pass - def _get_client(self, online_config: BigTableOnlineStoreConfig, admin: bool = False): + def _get_client( + self, online_config: BigTableOnlineStoreConfig, admin: bool = False + ): if not self._client: self._client = bigtable.Client(project=online_config.project, admin=admin) return self._client diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index c11b25849e3..a3e2090e93b 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -52,6 +52,7 @@ "hbase": "feast.infra.online_stores.contrib.hbase_online_store.hbase.HbaseOnlineStore", "cassandra": "feast.infra.online_stores.contrib.cassandra_online_store.cassandra_online_store.CassandraOnlineStore", "mysql": "feast.infra.online_stores.contrib.mysql_online_store.mysql.MySQLOnlineStore", + "bigtable": "feast.infra.online_stores.contrib.bigtable_online_store.bigtable.BigTableOnlineStore", } OFFLINE_STORE_CLASS_FOR_TYPE = { diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 84a8c7408e1..dca0ba225ac 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -56,7 +56,7 @@ attrs==22.1.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.25.0 +azure-core==1.25.1 # via # adlfs # azure-identity @@ -80,7 +80,7 @@ backports-zoneinfo==0.2.1 # via # pytz-deprecation-shim # tzlocal -black==22.6.0 +black==22.8.0 # via feast (setup.py) boto3==1.20.23 # via @@ -173,7 +173,7 @@ dill==0.3.5.1 # via # feast (setup.py) # multiprocess -distlib==0.3.5 +distlib==0.3.6 # via virtualenv docker==6.0.0 # via @@ -187,9 +187,9 @@ entrypoints==0.4 # via altair execnet==1.9.0 # via pytest-xdist -executing==0.10.0 +executing==1.0.0 # via stack-data -fastapi==0.79.1 +fastapi==0.82.0 # via feast (setup.py) fastavro==1.6.0 # via @@ -199,7 +199,7 @@ fastjsonschema==2.16.1 # via nbformat filelock==3.8.0 # via virtualenv -firebase-admin==5.2.0 +firebase-admin==5.3.0 # via feast (setup.py) fissix==21.11.13 # via bowler @@ -219,20 +219,21 @@ gcsfs==2022.1.0 # via feast (setup.py) geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.8.2 +google-api-core[grpc]==2.10.0 # via # feast (setup.py) # firebase-admin # google-api-python-client # google-cloud-bigquery # google-cloud-bigquery-storage + # google-cloud-bigtable # google-cloud-core # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.57.0 +google-api-python-client==2.58.0 # via firebase-admin -google-auth==2.10.0 +google-auth==2.11.0 # via # gcsfs # google-api-core @@ -252,9 +253,12 @@ google-cloud-bigquery-storage==2.14.2 # via # feast (setup.py) # google-cloud-bigquery +google-cloud-bigtable==2.11.3 + # via feast (setup.py) google-cloud-core==2.3.2 # via # google-cloud-bigquery + # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore # google-cloud-storage @@ -267,38 +271,41 @@ google-cloud-storage==2.5.0 # feast (setup.py) # firebase-admin # gcsfs -google-crc32c==1.3.0 +google-crc32c==1.5.0 # via google-resumable-media google-resumable-media==2.3.3 # via # google-cloud-bigquery # google-cloud-storage -googleapis-common-protos==1.56.4 +googleapis-common-protos[grpc]==1.56.4 # via # feast (setup.py) # google-api-core + # grpc-google-iam-v1 # grpcio-status # tensorflow-metadata great-expectations==0.14.13 # via feast (setup.py) -greenlet==1.1.2 - # via sqlalchemy -grpcio==1.47.0 +grpc-google-iam-v1==0.12.4 + # via google-cloud-bigtable +grpcio==1.48.1 # via # feast (setup.py) # google-api-core # google-cloud-bigquery + # googleapis-common-protos + # grpc-google-iam-v1 # grpcio-reflection # grpcio-status # grpcio-testing # grpcio-tools -grpcio-reflection==1.47.0 +grpcio-reflection==1.48.1 # via feast (setup.py) -grpcio-status==1.47.0 +grpcio-status==1.48.1 # via google-api-core -grpcio-testing==1.47.0 +grpcio-testing==1.48.1 # via feast (setup.py) -grpcio-tools==1.47.0 +grpcio-tools==1.48.1 # via feast (setup.py) h11==0.13.0 # via uvicorn @@ -351,7 +358,7 @@ jsonpatch==1.32 # via great-expectations jsonpointer==2.3 # via jsonpatch -jsonschema==4.13.0 +jsonschema==4.15.0 # via # altair # feast (setup.py) @@ -381,7 +388,7 @@ mock==2.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -moto==3.1.18 +moto==4.0.2 # via feast (setup.py) msal==1.18.0 # via @@ -445,7 +452,7 @@ packaging==21.3 # pytest # redis # sphinx -pandas==1.4.3 +pandas==1.4.4 # via # altair # db-dtypes @@ -460,7 +467,7 @@ parso==0.8.3 # via jedi partd==1.3.0 # via dask -pathspec==0.9.0 +pathspec==0.10.1 # via black pbr==5.10.0 # via mock @@ -486,13 +493,14 @@ portalocker==2.5.1 # via msal-extensions pre-commit==2.20.0 # via feast (setup.py) -prompt-toolkit==3.0.30 +prompt-toolkit==3.0.31 # via ipython -proto-plus==1.22.0 +proto-plus==1.22.1 # via # feast (setup.py) # google-cloud-bigquery # google-cloud-bigquery-storage + # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore protobuf==3.20.2 @@ -501,6 +509,7 @@ protobuf==3.20.2 # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage + # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore # googleapis-common-protos @@ -547,7 +556,7 @@ pycparser==2.21 # via cffi pycryptodomex==3.15.0 # via snowflake-connector-python -pydantic==1.9.2 +pydantic==1.10.1 # via # fastapi # feast (setup.py) @@ -580,7 +589,7 @@ pyrsistent==0.18.1 # via jsonschema pyspark==3.3.0 # via feast (setup.py) -pytest==7.1.2 +pytest==7.1.3 # via # feast (setup.py) # pytest-benchmark @@ -616,7 +625,7 @@ python-dateutil==2.8.2 # kubernetes # moto # pandas -python-dotenv==0.20.0 +python-dotenv==0.21.0 # via uvicorn pytz==2022.2.1 # via @@ -676,7 +685,7 @@ s3fs==2022.1.0 # via feast (setup.py) s3transfer==0.5.2 # via boto3 -scipy==1.9.0 +scipy==1.9.1 # via great-expectations six==1.16.0 # via @@ -694,7 +703,7 @@ six==1.16.0 # msrestazure # pandavro # python-dateutil -sniffio==1.2.0 +sniffio==1.3.0 # via anyio snowballstemmer==2.2.0 # via sphinx @@ -720,9 +729,9 @@ sphinxcontrib-serializinghtml==1.1.5 # via sphinx sqlalchemy[mypy]==1.4.40 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a25 +sqlalchemy2-stubs==0.0.2a27 # via sqlalchemy -stack-data==0.4.0 +stack-data==0.5.0 # via ipython starlette==0.19.1 # via fastapi @@ -730,7 +739,7 @@ tabulate==0.8.10 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.9.0 +tensorflow-metadata==1.10.0 # via feast (setup.py) termcolor==1.1.0 # via great-expectations @@ -755,7 +764,7 @@ toolz==0.12.0 # altair # dask # partd -tqdm==4.64.0 +tqdm==4.64.1 # via # feast (setup.py) # great-expectations @@ -769,7 +778,7 @@ trino==0.315.0 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -types-protobuf==3.19.22 +types-protobuf==3.20.1 # via # feast (setup.py) # mypy-protobuf @@ -781,11 +790,11 @@ types-pytz==2022.2.1.0 # via feast (setup.py) types-pyyaml==6.0.11 # via feast (setup.py) -types-redis==4.3.14 +types-redis==4.3.20 # via feast (setup.py) types-requests==2.28.9 # via feast (setup.py) -types-setuptools==65.1.0 +types-setuptools==65.3.0 # via feast (setup.py) types-tabulate==0.8.11 # via feast (setup.py) @@ -807,7 +816,7 @@ tzlocal==4.2 # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.11 +urllib3==1.26.12 # via # botocore # docker @@ -817,11 +826,11 @@ urllib3==1.26.11 # minio # requests # responses -uvicorn[standard]==0.18.2 +uvicorn[standard]==0.18.3 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -virtualenv==20.16.3 +virtualenv==20.16.4 # via pre-commit volatile==2.1.0 # via bowler @@ -829,7 +838,7 @@ watchfiles==0.16.1 # via uvicorn wcwidth==0.2.5 # via prompt-toolkit -websocket-client==1.3.3 +websocket-client==1.4.1 # via # docker # kubernetes diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index b992ad3fc0a..3410af7585f 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -22,7 +22,7 @@ cachetools==5.2.0 # via google-auth certifi==2022.6.15 # via requests -charset-normalizer==2.1.0 +charset-normalizer==2.1.1 # via requests click==8.1.3 # via @@ -38,7 +38,7 @@ dask==2022.1.1 # via feast (setup.py) dill==0.3.5.1 # via feast (setup.py) -fastapi==0.79.1 +fastapi==0.82.0 # via feast (setup.py) fastavro==1.6.0 # via @@ -46,24 +46,22 @@ fastavro==1.6.0 # pandavro fissix==21.11.13 # via bowler -fsspec==2022.7.1 +fsspec==2022.8.2 # via dask -google-api-core==2.8.2 +google-api-core==2.10.0 # via feast (setup.py) -google-auth==2.10.0 +google-auth==2.11.0 # via google-api-core googleapis-common-protos==1.56.4 # via # feast (setup.py) # google-api-core # tensorflow-metadata -greenlet==1.1.2 - # via sqlalchemy -grpcio==1.47.0 +grpcio==1.48.1 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.47.0 +grpcio-reflection==1.48.1 # via feast (setup.py) h11==0.13.0 # via uvicorn @@ -77,7 +75,7 @@ importlib-resources==5.9.0 # via jsonschema jinja2==3.1.2 # via feast (setup.py) -jsonschema==4.13.0 +jsonschema==4.15.0 # via feast (setup.py) locket==1.0.0 # via partd @@ -99,7 +97,7 @@ numpy==1.23.2 # pyarrow packaging==21.3 # via dask -pandas==1.4.3 +pandas==1.4.4 # via # feast (setup.py) # pandavro @@ -109,7 +107,7 @@ partd==1.3.0 # via dask pkgutil-resolve-name==1.3.10 # via jsonschema -proto-plus==1.22.0 +proto-plus==1.22.1 # via feast (setup.py) protobuf==3.20.2 # via @@ -127,7 +125,7 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pydantic==1.9.2 +pydantic==1.10.1 # via # fastapi # feast (setup.py) @@ -139,7 +137,7 @@ pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas -python-dotenv==0.20.0 +python-dotenv==0.21.0 # via uvicorn pytz==2022.2.1 # via pandas @@ -158,11 +156,11 @@ six==1.16.0 # grpcio # pandavro # python-dateutil -sniffio==1.2.0 +sniffio==1.3.0 # via anyio sqlalchemy[mypy]==1.4.40 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a25 +sqlalchemy2-stubs==0.0.2a27 # via sqlalchemy starlette==0.19.1 # via fastapi @@ -170,7 +168,7 @@ tabulate==0.8.10 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.9.0 +tensorflow-metadata==1.10.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -180,7 +178,7 @@ toolz==0.12.0 # via # dask # partd -tqdm==4.64.0 +tqdm==4.64.1 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) @@ -190,9 +188,9 @@ typing-extensions==4.3.0 # pydantic # sqlalchemy2-stubs # starlette -urllib3==1.26.11 +urllib3==1.26.12 # via requests -uvicorn[standard]==0.18.2 +uvicorn[standard]==0.18.3 # via feast (setup.py) uvloop==0.16.0 # via uvicorn diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 708d9c0a142..b116a3605bf 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -51,6 +51,9 @@ create_order_feature_view, create_pushable_feature_view, ) +from tests.integration.feature_repos.universal.online_store.bigtable import ( + BigTableOnlineStoreCreator, +) from tests.integration.feature_repos.universal.online_store.datastore import ( DatastoreOnlineStoreCreator, ) @@ -115,6 +118,7 @@ AVAILABLE_ONLINE_STORES["dynamodb"] = (DYNAMO_CONFIG, None) AVAILABLE_ONLINE_STORES["datastore"] = ("datastore", None) AVAILABLE_ONLINE_STORES["snowflake"] = (SNOWFLAKE_CONFIG, None) + AVAILABLE_ONLINE_STORES["bigtable"] = ("bigtable", None) full_repo_configs_module = os.environ.get(FULL_REPO_CONFIGS_MODULE_ENV_NAME) @@ -161,6 +165,7 @@ "redis": (REDIS_CONFIG, RedisOnlineStoreCreator), "dynamodb": (DYNAMO_CONFIG, DynamoDBOnlineStoreCreator), "datastore": ("datastore", DatastoreOnlineStoreCreator), + "bigtable": ("bigtable", BigTableOnlineStoreCreator), } for key, replacement in replacements.items(): diff --git a/sdk/python/tests/integration/feature_repos/universal/online_store/bigtable.py b/sdk/python/tests/integration/feature_repos/universal/online_store/bigtable.py new file mode 100644 index 00000000000..2592912be50 --- /dev/null +++ b/sdk/python/tests/integration/feature_repos/universal/online_store/bigtable.py @@ -0,0 +1,39 @@ +import os +from typing import Dict + +from google.cloud import bigtable +from testcontainers.core.container import DockerContainer +from testcontainers.core.waiting_utils import wait_for_logs + +from tests.integration.feature_repos.universal.online_store_creator import ( + OnlineStoreCreator, +) + + +# TODO: deduplicate code between this and the datastore creator +class BigTableOnlineStoreCreator(OnlineStoreCreator): + def __init__(self, project_name: str, **kwargs): + super().__init__(project_name) + self.container = ( + DockerContainer( + "gcr.io/google.com/cloudsdktool/cloud-sdk:380.0.0-emulators" + ) + .with_command( + "gcloud beta emulators bigtable start --project test-project --host-port 0.0.0.0:8082" + ) + .with_exposed_ports("8082") + ) + + def create_online_store(self) -> Dict[str, str]: + self.container.start() + log_string_to_wait_for = r"\[bigtable\] Dev App Server is now running" + wait_for_logs( + container=self.container, predicate=log_string_to_wait_for, timeout=10 + ) + exposed_port = self.container.get_exposed_port("8082") + os.environ[bigtable.client.BIGTABLE_EMULATOR] = f"0.0.0.0:{exposed_port}" + return {"type": "bigtable", "project_id": "test-project"} + + def teardown(self): + del os.environ[bigtable.client.BIGTABLE_EMULATOR] + self.container.stop() diff --git a/setup.py b/setup.py index 59c362ff9fc..ab7bb8b6e50 100644 --- a/setup.py +++ b/setup.py @@ -84,6 +84,7 @@ "google-cloud-bigquery-storage >= 2.0.0,<3", "google-cloud-datastore>=2.1.*,<3", "google-cloud-storage>=1.34.*,<3", + "google-cloud-bigtable>=2.11.*,<3", ] REDIS_REQUIRED = [ From 2a1a86396b15f6f29715861c55444f6c72c94fc1 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Mon, 5 Sep 2022 16:55:00 -0400 Subject: [PATCH 03/24] Got the BigTable tests running in local containers Signed-off-by: Abhin Chhabra --- .../contrib/bigtable_online_store/bigtable.py | 18 +++++++++++---- .../universal/online_store/bigtable.py | 22 +++++++++++++------ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py index 4e039d71d1b..bfb64a85762 100644 --- a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py +++ b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py @@ -28,11 +28,19 @@ class BigTableOnlineStoreConfig(FeastConfigBaseModel): + """Online store config for GCP BigTable""" + type: Literal["bigtable"] = "bigtable" + """Online store typee selector""" + + project_id: Optional[StrictStr] = None + """(optional) GCP Project ID""" - project: StrictStr instance: StrictStr + """The BigTable instance's ID""" + max_versions: int = 2 + """The number of historical versions of data that will be kept around.""" class BigTableOnlineStore(OnlineStore): @@ -50,7 +58,7 @@ def online_read( bt_table_name = self._get_table_name(config=config, feature_view=feature_view) column_family_id = feature_view.name - client = bigtable.Client(project=config.online_store.project) + client = self._get_client(online_config=config.online_store) bt_instance = client.instance(instance_id=config.online_store.instance) bt_table = bt_instance.table(bt_table_name) row_keys = [compute_entity_id(entity_key) for entity_key in entity_keys] @@ -101,7 +109,7 @@ def online_write_batch( bt_table_name = self._get_table_name(config=config, feature_view=feature_view) column_family_id = feature_view.name - client = bigtable.Client(project=config.online_store.project) + client = self._get_client(online_config=config.online_store) bt_instance = client.instance(instance_id=config.online_store.instance) bt_table = bt_instance.table(bt_table_name) @@ -237,5 +245,7 @@ def _get_client( self, online_config: BigTableOnlineStoreConfig, admin: bool = False ): if not self._client: - self._client = bigtable.Client(project=online_config.project, admin=admin) + self._client = bigtable.Client( + project=online_config.project_id, admin=admin + ) return self._client diff --git a/sdk/python/tests/integration/feature_repos/universal/online_store/bigtable.py b/sdk/python/tests/integration/feature_repos/universal/online_store/bigtable.py index 2592912be50..c40457a8111 100644 --- a/sdk/python/tests/integration/feature_repos/universal/online_store/bigtable.py +++ b/sdk/python/tests/integration/feature_repos/universal/online_store/bigtable.py @@ -10,8 +10,12 @@ ) -# TODO: deduplicate code between this and the datastore creator class BigTableOnlineStoreCreator(OnlineStoreCreator): + gcp_project = "test-project" + host = "0.0.0.0" + port = "8086" + bt_instance = "test-instance" + def __init__(self, project_name: str, **kwargs): super().__init__(project_name) self.container = ( @@ -19,20 +23,24 @@ def __init__(self, project_name: str, **kwargs): "gcr.io/google.com/cloudsdktool/cloud-sdk:380.0.0-emulators" ) .with_command( - "gcloud beta emulators bigtable start --project test-project --host-port 0.0.0.0:8082" + f"gcloud beta emulators bigtable start --project {self.gcp_project} --host-port {self.host}:{self.port}" ) - .with_exposed_ports("8082") + .with_exposed_ports(self.port) ) def create_online_store(self) -> Dict[str, str]: self.container.start() - log_string_to_wait_for = r"\[bigtable\] Dev App Server is now running" + log_string_to_wait_for = r"\[bigtable\] Cloud Bigtable emulator running" wait_for_logs( container=self.container, predicate=log_string_to_wait_for, timeout=10 ) - exposed_port = self.container.get_exposed_port("8082") - os.environ[bigtable.client.BIGTABLE_EMULATOR] = f"0.0.0.0:{exposed_port}" - return {"type": "bigtable", "project_id": "test-project"} + exposed_port = self.container.get_exposed_port(self.port) + os.environ[bigtable.client.BIGTABLE_EMULATOR] = f"{self.host}:{exposed_port}" + return { + "type": "bigtable", + "project_id": self.gcp_project, + "instance": self.bt_instance, + } def teardown(self): del os.environ[bigtable.client.BIGTABLE_EMULATOR] From 96814d67bef875ac0939d7ea44997df406628c7b Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Mon, 5 Sep 2022 21:04:13 -0400 Subject: [PATCH 04/24] Set serialization version when computing entity ID Signed-off-by: Abhin Chhabra --- .../contrib/bigtable_online_store/bigtable.py | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py index bfb64a85762..511cc47e8f4 100644 --- a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py +++ b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py @@ -61,7 +61,13 @@ def online_read( client = self._get_client(online_config=config.online_store) bt_instance = client.instance(instance_id=config.online_store.instance) bt_table = bt_instance.table(bt_table_name) - row_keys = [compute_entity_id(entity_key) for entity_key in entity_keys] + row_keys = [ + compute_entity_id( + entity_key, + entity_key_serialization_version=config.entity_key_serialization_version, + ) + for entity_key in entity_keys + ] batch_result: List[ Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]] @@ -132,16 +138,29 @@ def online_write_batch( rows_to_write=rows_to_write, bt_table=bt_table, column_family_id=column_family_id, + config=config, ) ) futures.wait(fs) @staticmethod - def _write_rows_to_bt(rows_to_write, bt_table, column_family_id): + def _write_rows_to_bt( + rows_to_write: List[ + Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] + ], + bt_table: bigtable.table.Table, + column_family_id: str, + config: RepoConfig, + ): rows = [] for row in rows_to_write: entity_key, features, timestamp, created_ts = row - bt_row = bt_table.direct_row(compute_entity_id(entity_key)) + bt_row = bt_table.direct_row( + compute_entity_id( + entity_key, + entity_key_serialization_version=config.entity_key_serialization_version, + ) + ) for feature_name, feature_value in features.items(): bt_row.set_cell( @@ -239,7 +258,6 @@ def teardown( logger.warning( f"Table `{table_name}` was not found. Skipping deletion." ) - pass def _get_client( self, online_config: BigTableOnlineStoreConfig, admin: bool = False From 6e6233f69cf22341790bb275935b0db28c5def6f Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Mon, 5 Sep 2022 22:34:51 -0400 Subject: [PATCH 05/24] Switch to the recommended layout in bigtable. This was recommended by the BigTable dev team. Details of this layout will be added to the documentation in a future commit. Signed-off-by: Abhin Chhabra --- .../contrib/bigtable_online_store/bigtable.py | 184 ++++++++++-------- 1 file changed, 108 insertions(+), 76 deletions(-) diff --git a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py index 511cc47e8f4..5d346e77e97 100644 --- a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py +++ b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py @@ -1,14 +1,14 @@ import logging from concurrent import futures from datetime import datetime -from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple import google from google.cloud import bigtable from pydantic import StrictStr from pydantic.typing import Literal -from feast import Entity, FeatureView +from feast import Entity, FeatureView, feature_view, utils from feast.infra.online_stores.helpers import compute_entity_id from feast.infra.online_stores.online_store import OnlineStore from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto @@ -19,11 +19,11 @@ logger = logging.getLogger(__name__) -# Number of mutations per BigTable write operation we're aiming for. The official max is 100K; we're -# being conservative. +# Number of mutations per BigTable write operation we're aiming for. The official max is +# 100K; we're being conservative. MUTATIONS_PER_OP = 50_000 -# The Bigtable client library limits the connection pool size to 10. This imposes a limitation to -# the concurrency we can get using a thread pool in each worker. +# The Bigtable client library limits the connection pool size to 10. This imposes a +# limitation to the concurrency we can get using a thread pool in each worker. BIGTABLE_CLIENT_CONNECTION_POOL_SIZE = 10 @@ -46,6 +46,8 @@ class BigTableOnlineStoreConfig(FeastConfigBaseModel): class BigTableOnlineStore(OnlineStore): _client: Optional[bigtable.Client] = None + feature_column_family: str = "features" + @log_exceptions_and_usage(online_store="bigtable") def online_read( self, @@ -56,15 +58,15 @@ def online_read( ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: feature_view = table bt_table_name = self._get_table_name(config=config, feature_view=feature_view) - column_family_id = feature_view.name client = self._get_client(online_config=config.online_store) bt_instance = client.instance(instance_id=config.online_store.instance) bt_table = bt_instance.table(bt_table_name) row_keys = [ - compute_entity_id( - entity_key, - entity_key_serialization_version=config.entity_key_serialization_version, + self._compute_row_key( + entity_key=entity_key, + feature_view_name=feature_view.name, + config=config, ) for entity_key in entity_keys ] @@ -76,30 +78,28 @@ def online_read( # TODO: read all the rows in a single call instead of reading them sequentially for row_key in row_keys: res = {} - # TODO: use filters to reduce the amount of data transfered and skip unnecessary columns. + # TODO: use filters to reduce the amount of data transfered and skip + # unnecessary columns. row = bt_table.read_row(row_key) if row is None: - continue + batch_result.append((None, None)) - for feature_name, feature_values in row.cells.get( - column_family_id, {} - ).items(): + row_values = row.cells[self.feature_column_family] + # TODO: check if we need created_ts anywhere + row_values.pop(b"created_ts") + event_ts = datetime.fromisoformat( + row_values.pop(b"event_ts")[0].value.decode() + ) + for feature_name, feature_values in row_values.items(): # We only want to retrieve the latest value for each feature feature_value = feature_values[0] val = ValueProto() val.ParseFromString(feature_value.value) res[feature_name.decode()] = val - batch_result.append((feature_value.timestamp, res)) - - result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] - - # Pad in case not all entities in a batch have responses - batch_size_nones = ((None, None),) * (len(row_keys) - len(batch_result)) - batch_result.extend(batch_size_nones) - result.extend(batch_result) - return result + batch_result.append((event_ts, res)) + return batch_result @log_exceptions_and_usage(online_store="bigtable") def online_write_batch( @@ -113,16 +113,16 @@ def online_write_batch( ) -> None: feature_view = table bt_table_name = self._get_table_name(config=config, feature_view=feature_view) - column_family_id = feature_view.name client = self._get_client(online_config=config.online_store) bt_instance = client.instance(instance_id=config.online_store.instance) bt_table = bt_instance.table(bt_table_name) - # `columns_per_row` is used to calculate the number of rows we are allowed to mutate in one - # request. Since `MUTATIONS_PER_OP` is set much lower than the max allowed value, the - # calculation of `columns_per_row` doesn't need to be precise. Feature views can have 1 or 2 - # timestamp fields: event timestamp and created timestamp. We assume 2 conservatively. + # `columns_per_row` is used to calculate the number of rows we are allowed to + # mutate in one request. Since `MUTATIONS_PER_OP` is set much lower than the max + # allowed value, the calculation of `columns_per_row` doesn't need to be + # precise. Feature views can have 1 or 2 timestamp fields: event timestamp and + # created timestamp. We assume 2 conservatively. columns_per_row = len(feature_view.features) + 2 # extra for 2 timestamps rows_per_write = MUTATIONS_PER_OP // columns_per_row @@ -137,39 +137,71 @@ def online_write_batch( self._write_rows_to_bt, rows_to_write=rows_to_write, bt_table=bt_table, - column_family_id=column_family_id, + feature_view_name=feature_view.name, config=config, + progress=progress, ) ) futures.wait(fs) - @staticmethod def _write_rows_to_bt( + self, rows_to_write: List[ Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] ], bt_table: bigtable.table.Table, - column_family_id: str, + feature_view_name: str, config: RepoConfig, + progress: Optional[Callable[[int], Any]], ): rows = [] - for row in rows_to_write: - entity_key, features, timestamp, created_ts = row + for entity_key, features, timestamp, created_ts in rows_to_write: bt_row = bt_table.direct_row( - compute_entity_id( - entity_key, - entity_key_serialization_version=config.entity_key_serialization_version, + self._compute_row_key( + entity_key=entity_key, + feature_view_name=feature_view_name, + config=config, ) ) for feature_name, feature_value in features.items(): bt_row.set_cell( - column_family_id, feature_name, feature_value.SerializeToString() + self.feature_column_family, + feature_name.encode(), + feature_value.SerializeToString(), ) - # TODO: write timestamps during materialization as well + bt_row.set_cell( + self.feature_column_family, + b"event_ts", + utils.make_tzaware(timestamp).isoformat().encode(), + ) + bt_row.set_cell( + self.feature_column_family, + b"created_ts", + utils.make_tzaware(created_ts).isoformat().encode() + if created_ts is not None + else None, + ) rows.append(bt_row) bt_table.mutate_rows(rows) + if progress: + progress(len(rows)) + + def _compute_row_key( + self, entity_key: EntityKeyProto, feature_view_name: str, config: RepoConfig + ) -> str: + entity_id = compute_entity_id( + entity_key, + entity_key_serialization_version=config.entity_key_serialization_version, + ) + # Even though `entity_id` uniquely identifies an entity, we use the same table + # for multiple feature_views with the same set of entities. To uniquely identify + # the row for a feature_view, we suffix the name of the feature_view itself. + # This also ensures that features for entities from various feature_views are + # colocated. + return f"{entity_id}#{feature_view_name}" + def update( self, config: RepoConfig, @@ -179,12 +211,11 @@ def update( entities_to_keep: Sequence[Entity], partial: bool, ): - """Creates the appropriate tables and column families in BigTable. - - We use a dedicated table for each entity combination. For example, if a FeatureView uses the entities `shop` and - `customer`, the resulting table would be called `customer-shop` (entities are sorted lexicographically first). + """Creates the appropriate tables and column family in BigTable. - FeatureViews are represented by column families in their respective tables. + We use a dedicated table for each entity combination. For example, if a + FeatureView uses the entities `shop` and `customer`, the resulting table would + be called `customer-shop` (entities are sorted lexicographically first). """ online_config = config.online_store assert isinstance(online_config, BigTableOnlineStoreConfig) @@ -193,41 +224,41 @@ def update( max_versions_gc_rule = bigtable.column_family.MaxVersionsGCRule( online_config.max_versions ) - - for feature_view in tables_to_keep: - table_name = self._get_table_name(config=config, feature_view=feature_view) - table = bt_instance.table(table_name) - if not table.exists(): - logger.info( - f"Creating table `{table_name}` in BigTable for feature view `{feature_view.name}`" - ) - table.create() + # The word "table" in the arguments refers to feature_views (this is for legacy + # reasons). To reduce confusion with bigtable tables, we use alternate variable + # names + feature_views_to_keep = tables_to_keep + feature_views_to_delete = tables_to_delete + # Multiple feature views can share the same tables. So just because a feature + # view has been deleted does not mean that we can just delete the table. We map + # feature views to BigTable table names and figure out which ones to create + # and/or delete. + bt_tables_to_keep: Set[str] = { + self._get_table_name(config=config, feature_view=feature_view) + for feature_view in feature_views_to_keep + } + bt_tables_to_delete: Set[str] = { + self._get_table_name(config=config, feature_view=feature_view) + for feature_view in feature_views_to_delete + } - bt_tables_to_keep # we don't delete a table if it's in `bt_tables_to_keep` + + for bt_table_name in bt_tables_to_keep: + bt_table = bt_instance.table(bt_table_name) + if not bt_table.exists(): + logger.info(f"Creating table `{bt_table_name}` in BigTable") + bt_table.create() else: - logger.info(f"Table {table_name} already exists in BigTable") + logger.info(f"Table {bt_table_name} already exists in BigTable") - cfs = table.list_column_families() - if feature_view.name not in cfs: - table.column_family( - feature_view.name, gc_rule=max_versions_gc_rule + if self.feature_column_family not in bt_table.list_column_families(): + bt_table.column_family( + self.feature_column_family, gc_rule=max_versions_gc_rule ).create() - for feature_view in tables_to_delete: - table_name = self._get_table_name(config=config, feature_view=feature_view) - table = bt_instance.table(table_name) - cfs = table.list_column_families() - cf = cfs.pop(feature_view.name, None) - if cf is not None: - cf.delete() - else: - logger.warning( - f"Skipping deletion of column family `{feature_view.name}` in table `{table_name}` since it " - "doesn't exist. Perhaps it was deleted manually." - ) - if not cfs: - logger.info( - f"We've deleted all column families from the table `{table_name}`, so we're deleting it too." - ) - table.delete() + for bt_table_name in bt_tables_to_delete: + bt_table = bt_instance.table(bt_table_name) + logger.info(f"Deleting table {bt_table_name} in BigTable") + bt_table.delete() @staticmethod def _get_table_name(config: RepoConfig, feature_view: FeatureView) -> str: @@ -239,7 +270,8 @@ def teardown( tables: Sequence[FeatureView], entities: Sequence[Entity], ): - # Because of historical reasons, Feast calls them tables. We use this alias for readability. + # Because of historical reasons, Feast calls them tables. We use this alias for + # readability. feature_views = tables bt_tables = { From 2a0d09ac47e722e309e4f24667d2fb65d527d5c1 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Sat, 10 Sep 2022 18:24:13 -0400 Subject: [PATCH 06/24] Minor bugfixes. - If a row is empty when fetching data, don't process it more. - If a task in the threadpool fails, bubble up that failure. - If a `created_ts` is not available, use an empty string. `None` does not automatically serialize to bytes. Signed-off-by: Abhin Chhabra --- .../contrib/bigtable_online_store/bigtable.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py index 5d346e77e97..5aac0fff7d8 100644 --- a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py +++ b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py @@ -8,14 +8,13 @@ from pydantic import StrictStr from pydantic.typing import Literal -from feast import Entity, FeatureView, feature_view, utils +from feast import Entity, FeatureView, utils from feast.infra.online_stores.helpers import compute_entity_id from feast.infra.online_stores.online_store import OnlineStore from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto -from feast.protos.feast.types.Value_pb2 import ValueType from feast.repo_config import FeastConfigBaseModel, RepoConfig -from feast.usage import get_user_agent, log_exceptions_and_usage, tracing_span +from feast.usage import log_exceptions_and_usage logger = logging.getLogger(__name__) @@ -84,6 +83,7 @@ def online_read( if row is None: batch_result.append((None, None)) + continue row_values = row.cells[self.feature_column_family] # TODO: check if we need created_ts anywhere @@ -142,7 +142,14 @@ def online_write_batch( progress=progress, ) ) - futures.wait(fs) + done_tasks, not_done_tasks = futures.wait(fs) + for task in done_tasks: + # If a task raised an exception, this will raise it here as well + task.result() + if not_done_tasks: + raise RuntimeError( + f"Not all batches were written to BigTable: {not_done_tasks}" + ) def _write_rows_to_bt( self, @@ -180,7 +187,7 @@ def _write_rows_to_bt( b"created_ts", utils.make_tzaware(created_ts).isoformat().encode() if created_ts is not None - else None, + else b"", ) rows.append(bt_row) bt_table.mutate_rows(rows) @@ -294,8 +301,4 @@ def teardown( def _get_client( self, online_config: BigTableOnlineStoreConfig, admin: bool = False ): - if not self._client: - self._client = bigtable.Client( - project=online_config.project_id, admin=admin - ) - return self._client + return bigtable.Client(project=online_config.project_id, admin=admin) From 98532a50152d221c87b15a23cb48e048535a7798 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Tue, 27 Sep 2022 13:52:32 -0400 Subject: [PATCH 07/24] Move BigTable online store out of contrib As per feedback on the PR. Signed-off-by: Abhin Chhabra --- .../{contrib/bigtable_online_store => }/bigtable.py | 0 .../infra/online_stores/contrib/bigtable_online_store/README.md | 1 - .../online_stores/contrib/bigtable_online_store/__init__.py | 0 sdk/python/feast/repo_config.py | 2 +- 4 files changed, 1 insertion(+), 2 deletions(-) rename sdk/python/feast/infra/online_stores/{contrib/bigtable_online_store => }/bigtable.py (100%) delete mode 100644 sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/README.md delete mode 100644 sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/__init__.py diff --git a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py b/sdk/python/feast/infra/online_stores/bigtable.py similarity index 100% rename from sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/bigtable.py rename to sdk/python/feast/infra/online_stores/bigtable.py diff --git a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/README.md b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/README.md deleted file mode 100644 index 5b0e8da2d75..00000000000 --- a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/README.md +++ /dev/null @@ -1 +0,0 @@ -# BigTable Online Store diff --git a/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/__init__.py b/sdk/python/feast/infra/online_stores/contrib/bigtable_online_store/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index a3e2090e93b..17f4177b4d2 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -48,11 +48,11 @@ "redis": "feast.infra.online_stores.redis.RedisOnlineStore", "dynamodb": "feast.infra.online_stores.dynamodb.DynamoDBOnlineStore", "snowflake.online": "feast.infra.online_stores.snowflake.SnowflakeOnlineStore", + "bigtable": "feast.infra.online_stores.bigtable.BigTableOnlineStore", "postgres": "feast.infra.online_stores.contrib.postgres.PostgreSQLOnlineStore", "hbase": "feast.infra.online_stores.contrib.hbase_online_store.hbase.HbaseOnlineStore", "cassandra": "feast.infra.online_stores.contrib.cassandra_online_store.cassandra_online_store.CassandraOnlineStore", "mysql": "feast.infra.online_stores.contrib.mysql_online_store.mysql.MySQLOnlineStore", - "bigtable": "feast.infra.online_stores.contrib.bigtable_online_store.bigtable.BigTableOnlineStore", } OFFLINE_STORE_CLASS_FOR_TYPE = { From 2a65fefafefc8570ff1d60f9b5ada13dd1fcda5a Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Wed, 28 Sep 2022 09:03:36 -0400 Subject: [PATCH 08/24] Attempt to run integration tests in CI. Provide the GCP project and the bigtable instance ID for the tests to connect to. Signed-off-by: Abhin Chhabra --- .../tests/integration/feature_repos/repo_configuration.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index b116a3605bf..72e81098af2 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -87,6 +87,12 @@ "schema": "ONLINE", } +BIGTABLE_CONFIG = { + "type": "bigtable", + "project_id": os.getenv("GCLOUD_PROJECT", "kf-feast"), + "instance": os.getenv("BIGTABLE_INSTANCE_ID", "feast-integration-tests"), +} + OFFLINE_STORE_TO_PROVIDER_CONFIG: Dict[str, DataSourceCreator] = { "file": ("local", FileDataSourceCreator), "bigquery": ("gcp", BigQueryDataSourceCreator), @@ -118,7 +124,7 @@ AVAILABLE_ONLINE_STORES["dynamodb"] = (DYNAMO_CONFIG, None) AVAILABLE_ONLINE_STORES["datastore"] = ("datastore", None) AVAILABLE_ONLINE_STORES["snowflake"] = (SNOWFLAKE_CONFIG, None) - AVAILABLE_ONLINE_STORES["bigtable"] = ("bigtable", None) + AVAILABLE_ONLINE_STORES["bigtable"] = (BIGTABLE_CONFIG, None) full_repo_configs_module = os.environ.get(FULL_REPO_CONFIGS_MODULE_ENV_NAME) From de795f3e9c5326009196c0498a535dfd555f89e5 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Wed, 28 Sep 2022 10:04:15 -0400 Subject: [PATCH 09/24] Delete tables for entity-less feature views. Signed-off-by: Abhin Chhabra --- sdk/python/feast/infra/online_stores/bigtable.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/infra/online_stores/bigtable.py b/sdk/python/feast/infra/online_stores/bigtable.py index 5aac0fff7d8..3fcb6075844 100644 --- a/sdk/python/feast/infra/online_stores/bigtable.py +++ b/sdk/python/feast/infra/online_stores/bigtable.py @@ -9,6 +9,7 @@ from pydantic.typing import Literal from feast import Entity, FeatureView, utils +from feast.feature_view import DUMMY_ENTITY_NAME from feast.infra.online_stores.helpers import compute_entity_id from feast.infra.online_stores.online_store import OnlineStore from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto @@ -269,7 +270,12 @@ def update( @staticmethod def _get_table_name(config: RepoConfig, feature_view: FeatureView) -> str: - return f"{config.project}.{'-'.join(sorted(feature_view.entities))}" + entities_part = ( + "-".join(sorted(feature_view.entities)) + if feature_view.entities + else DUMMY_ENTITY_NAME + ) + return f"{config.project}.{entities_part}" def teardown( self, From bf798e841b5d33a67b802cc1ba1565c12d8d4992 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Wed, 28 Sep 2022 10:26:47 -0400 Subject: [PATCH 10/24] Table names should be smaller than 50 characters This is BigTable's table length limit and it's causing test failures. Signed-off-by: Abhin Chhabra --- .../feast/infra/online_stores/bigtable.py | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/infra/online_stores/bigtable.py b/sdk/python/feast/infra/online_stores/bigtable.py index 3fcb6075844..881ffeea9cb 100644 --- a/sdk/python/feast/infra/online_stores/bigtable.py +++ b/sdk/python/feast/infra/online_stores/bigtable.py @@ -1,3 +1,4 @@ +import hashlib import logging from concurrent import futures from datetime import datetime @@ -275,7 +276,30 @@ def _get_table_name(config: RepoConfig, feature_view: FeatureView) -> str: if feature_view.entities else DUMMY_ENTITY_NAME ) - return f"{config.project}.{entities_part}" + BIGTABLE_TABLE_MAX_LENGTH = 50 + ENTITIES_PART_MAX_LENGTH = 25 + # Bigtable limits table names to 50 characters. We'll limit the max size of of + # the `entities_part` and if that's not enough, we'll just hash the + # entities_part. The remaining length is dedicated to the project name. This + # allows multiple projects to coexist in the same bigtable instance. This also + # allows multiple integration test executions to run simultaneously without + # conflicts. + if len(entities_part) > ENTITIES_PART_MAX_LENGTH: + entities_part = hashlib.md5(entities_part.encode()).hexdigest()[ + :ENTITIES_PART_MAX_LENGTH + ] + remaining_length = BIGTABLE_TABLE_MAX_LENGTH - len(entities_part) + if len(config.project) > remaining_length: + HUMAN_READABLE_PART_LENGTH = 10 + HASH_PART_LENGTH = remaining_length - HUMAN_READABLE_PART_LENGTH - 1 + project_part = ( + config.project[:HUMAN_READABLE_PART_LENGTH] + + "_" + + hashlib.md5(config.project.encode()).hexdigest()[:HASH_PART_LENGTH] + ) + else: + project_part = config.project + return f"{project_part}.{entities_part}" def teardown( self, From eb3ab910f2d41758282dbfbe3363eb5b7e792d7b Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Wed, 28 Sep 2022 14:25:38 -0400 Subject: [PATCH 11/24] Optimize bigtable reads. - Fetch all the rows in one bigtable fetch. - Get only the columns that are necessary (using a column regex filter). Signed-off-by: Abhin Chhabra --- .../feast/infra/online_stores/bigtable.py | 65 ++++++++++--------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/sdk/python/feast/infra/online_stores/bigtable.py b/sdk/python/feast/infra/online_stores/bigtable.py index 881ffeea9cb..7d0870d75f9 100644 --- a/sdk/python/feast/infra/online_stores/bigtable.py +++ b/sdk/python/feast/infra/online_stores/bigtable.py @@ -6,6 +6,7 @@ import google from google.cloud import bigtable +from google.cloud.bigtable import row_filters from pydantic import StrictStr from pydantic.typing import Literal @@ -57,6 +58,8 @@ def online_read( entity_keys: List[EntityKeyProto], requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + # Potential performance improvement opportunity described in + # https://github.com/feast-dev/feast/issues/3259 feature_view = table bt_table_name = self._get_table_name(config=config, feature_view=feature_view) @@ -72,36 +75,40 @@ def online_read( for entity_key in entity_keys ] - batch_result: List[ - Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]] - ] = [] - - # TODO: read all the rows in a single call instead of reading them sequentially + row_set = bigtable.row_set.RowSet() for row_key in row_keys: - res = {} - # TODO: use filters to reduce the amount of data transfered and skip - # unnecessary columns. - row = bt_table.read_row(row_key) - - if row is None: - batch_result.append((None, None)) - continue - - row_values = row.cells[self.feature_column_family] - # TODO: check if we need created_ts anywhere - row_values.pop(b"created_ts") - event_ts = datetime.fromisoformat( - row_values.pop(b"event_ts")[0].value.decode() - ) - for feature_name, feature_values in row_values.items(): - # We only want to retrieve the latest value for each feature - feature_value = feature_values[0] - val = ValueProto() - val.ParseFromString(feature_value.value) - res[feature_name.decode()] = val + row_set.add_row_key(row_key) + rows = bt_table.read_rows( + row_set=row_set, + filter_=( + row_filters.ColumnQualifierRegexFilter( + f"^({'|'.join(requested_features)}|event_ts)$".encode() + ) + if requested_features + else None + ), + ) + + return [self._process_bt_row(row) for row in rows] + + def _process_bt_row( + self, row: bigtable.row.PartialRowData + ) -> Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]: + res = {} + + if row is None: + return (None, None) + + row_values = row.cells[self.feature_column_family] + event_ts = datetime.fromisoformat(row_values.pop(b"event_ts")[0].value.decode()) + for feature_name, feature_values in row_values.items(): + # We only want to retrieve the latest value for each feature + feature_value = feature_values[0] + val = ValueProto() + val.ParseFromString(feature_value.value) + res[feature_name.decode()] = val - batch_result.append((event_ts, res)) - return batch_result + return (event_ts, res) @log_exceptions_and_usage(online_store="bigtable") def online_write_batch( @@ -277,7 +284,7 @@ def _get_table_name(config: RepoConfig, feature_view: FeatureView) -> str: else DUMMY_ENTITY_NAME ) BIGTABLE_TABLE_MAX_LENGTH = 50 - ENTITIES_PART_MAX_LENGTH = 25 + ENTITIES_PART_MAX_LENGTH = 24 # Bigtable limits table names to 50 characters. We'll limit the max size of of # the `entities_part` and if that's not enough, we'll just hash the # entities_part. The remaining length is dedicated to the project name. This From 1383b7e23c9bc10eaa8ba22b83e8af28d8dd48ff Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Wed, 28 Sep 2022 15:21:09 -0400 Subject: [PATCH 12/24] dynamodb: switch to `mock_dynamodb` The latest rebuilding of requirements has upgraded the `moto` library past the `4.0.0` release, which has a couple of breaking changes. Specifically, the `mock_dynamodb2` decorator has been deprecated. See https://github.com/spulec/moto/blob/master/CHANGELOG.md#400 for more details. The actual PR (https://github.com/spulec/moto/pull/4919) mentions that it's because the `mock_dynamodb` decorator is now equivalent to the `mock_dynamodb2` decorator. Signed-off-by: Abhin Chhabra --- .../online_store/test_dynamodb_online_store.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py index 9dca44dc09f..6045dbc6ce0 100644 --- a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py +++ b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py @@ -3,7 +3,7 @@ import boto3 import pytest -from moto import mock_dynamodb2 +from moto import mock_dynamodb from feast.infra.offline_stores.file import FileOfflineStoreConfig from feast.infra.online_stores.dynamodb import ( @@ -159,7 +159,7 @@ def test_dynamodb_table_dynamodb_resource(): assert dynamodb_resource.meta.client.meta.endpoint_url == endpoint_url -@mock_dynamodb2 +@mock_dynamodb @pytest.mark.parametrize("n_samples", [5, 50, 100]) def test_dynamodb_online_store_online_read( repo_config, dynamodb_online_store, n_samples @@ -180,7 +180,7 @@ def test_dynamodb_online_store_online_read( assert [item[1] for item in returned_items] == list(features) -@mock_dynamodb2 +@mock_dynamodb @pytest.mark.parametrize("n_samples", [5, 50, 100]) def test_dynamodb_online_store_online_write_batch( repo_config, dynamodb_online_store, n_samples @@ -207,7 +207,7 @@ def test_dynamodb_online_store_online_write_batch( assert [item[1] for item in stored_items] == list(features) -@mock_dynamodb2 +@mock_dynamodb def test_dynamodb_online_store_update(repo_config, dynamodb_online_store): """Test DynamoDBOnlineStore update method.""" # create dummy table to keep @@ -236,7 +236,7 @@ def test_dynamodb_online_store_update(repo_config, dynamodb_online_store): assert existing_tables[0] == f"test_aws.{db_table_keep_name}" -@mock_dynamodb2 +@mock_dynamodb def test_dynamodb_online_store_teardown(repo_config, dynamodb_online_store): """Test DynamoDBOnlineStore teardown method.""" db_table_delete_name_one = f"{TABLE_NAME}_delete_teardown_1" @@ -262,7 +262,7 @@ def test_dynamodb_online_store_teardown(repo_config, dynamodb_online_store): assert len(existing_tables) == 0 -@mock_dynamodb2 +@mock_dynamodb def test_dynamodb_online_store_online_read_unknown_entity( repo_config, dynamodb_online_store ): @@ -301,7 +301,7 @@ def test_dynamodb_online_store_online_read_unknown_entity( assert returned_items[pos] == (None, None) -@mock_dynamodb2 +@mock_dynamodb def test_write_batch_non_duplicates(repo_config, dynamodb_online_store): """Test DynamoDBOnline Store deduplicate write batch request items.""" dynamodb_tbl = f"{TABLE_NAME}_batch_non_duplicates" @@ -321,7 +321,7 @@ def test_write_batch_non_duplicates(repo_config, dynamodb_online_store): assert len(returned_items) == len(data) -@mock_dynamodb2 +@mock_dynamodb def test_dynamodb_online_store_online_read_unknown_entity_end_of_batch( repo_config, dynamodb_online_store ): From 6986fa93646e7a5fb207c5951749d87e87a5f667 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Wed, 28 Sep 2022 20:53:50 -0400 Subject: [PATCH 13/24] minor: rename `BigTable` to `Bigtable` This matches the GCP docs. Signed-off-by: Abhin Chhabra --- .../feast/infra/online_stores/bigtable.py | 32 +++++++++---------- sdk/python/feast/repo_config.py | 2 +- .../feature_repos/repo_configuration.py | 4 +-- .../universal/online_store/bigtable.py | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/sdk/python/feast/infra/online_stores/bigtable.py b/sdk/python/feast/infra/online_stores/bigtable.py index 7d0870d75f9..0811ca562cb 100644 --- a/sdk/python/feast/infra/online_stores/bigtable.py +++ b/sdk/python/feast/infra/online_stores/bigtable.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) -# Number of mutations per BigTable write operation we're aiming for. The official max is +# Number of mutations per Bigtable write operation we're aiming for. The official max is # 100K; we're being conservative. MUTATIONS_PER_OP = 50_000 # The Bigtable client library limits the connection pool size to 10. This imposes a @@ -29,8 +29,8 @@ BIGTABLE_CLIENT_CONNECTION_POOL_SIZE = 10 -class BigTableOnlineStoreConfig(FeastConfigBaseModel): - """Online store config for GCP BigTable""" +class BigtableOnlineStoreConfig(FeastConfigBaseModel): + """Online store config for GCP Bigtable""" type: Literal["bigtable"] = "bigtable" """Online store typee selector""" @@ -39,13 +39,13 @@ class BigTableOnlineStoreConfig(FeastConfigBaseModel): """(optional) GCP Project ID""" instance: StrictStr - """The BigTable instance's ID""" + """The Bigtable instance's ID""" max_versions: int = 2 """The number of historical versions of data that will be kept around.""" -class BigTableOnlineStore(OnlineStore): +class BigtableOnlineStore(OnlineStore): _client: Optional[bigtable.Client] = None feature_column_family: str = "features" @@ -157,7 +157,7 @@ def online_write_batch( task.result() if not_done_tasks: raise RuntimeError( - f"Not all batches were written to BigTable: {not_done_tasks}" + f"Not all batches were written to Bigtable: {not_done_tasks}" ) def _write_rows_to_bt( @@ -227,14 +227,14 @@ def update( entities_to_keep: Sequence[Entity], partial: bool, ): - """Creates the appropriate tables and column family in BigTable. + """Creates the appropriate tables and column family in Bigtable. We use a dedicated table for each entity combination. For example, if a FeatureView uses the entities `shop` and `customer`, the resulting table would be called `customer-shop` (entities are sorted lexicographically first). """ online_config = config.online_store - assert isinstance(online_config, BigTableOnlineStoreConfig) + assert isinstance(online_config, BigtableOnlineStoreConfig) client = self._get_client(online_config, admin=True) bt_instance = client.instance(instance_id=online_config.instance) max_versions_gc_rule = bigtable.column_family.MaxVersionsGCRule( @@ -247,7 +247,7 @@ def update( feature_views_to_delete = tables_to_delete # Multiple feature views can share the same tables. So just because a feature # view has been deleted does not mean that we can just delete the table. We map - # feature views to BigTable table names and figure out which ones to create + # feature views to Bigtable table names and figure out which ones to create # and/or delete. bt_tables_to_keep: Set[str] = { self._get_table_name(config=config, feature_view=feature_view) @@ -261,10 +261,10 @@ def update( for bt_table_name in bt_tables_to_keep: bt_table = bt_instance.table(bt_table_name) if not bt_table.exists(): - logger.info(f"Creating table `{bt_table_name}` in BigTable") + logger.info(f"Creating table `{bt_table_name}` in Bigtable") bt_table.create() else: - logger.info(f"Table {bt_table_name} already exists in BigTable") + logger.info(f"Table {bt_table_name} already exists in Bigtable") if self.feature_column_family not in bt_table.list_column_families(): bt_table.column_family( @@ -273,7 +273,7 @@ def update( for bt_table_name in bt_tables_to_delete: bt_table = bt_instance.table(bt_table_name) - logger.info(f"Deleting table {bt_table_name} in BigTable") + logger.info(f"Deleting table {bt_table_name} in Bigtable") bt_table.delete() @staticmethod @@ -295,7 +295,7 @@ def _get_table_name(config: RepoConfig, feature_view: FeatureView) -> str: entities_part = hashlib.md5(entities_part.encode()).hexdigest()[ :ENTITIES_PART_MAX_LENGTH ] - remaining_length = BIGTABLE_TABLE_MAX_LENGTH - len(entities_part) + remaining_length = BIGTABLE_TABLE_MAX_LENGTH - len(entities_part) - 1 if len(config.project) > remaining_length: HUMAN_READABLE_PART_LENGTH = 10 HASH_PART_LENGTH = remaining_length - HUMAN_READABLE_PART_LENGTH - 1 @@ -323,12 +323,12 @@ def teardown( } online_config = config.online_store - assert isinstance(online_config, BigTableOnlineStoreConfig) + assert isinstance(online_config, BigtableOnlineStoreConfig) client = self._get_client(online_config, admin=True) bt_instance = client.instance(instance_id=online_config.instance) for table_name in bt_tables: try: - logger.info(f"Deleting BigTable table `{table_name}`") + logger.info(f"Deleting Bigtable table `{table_name}`") bt_instance.table(table_name).delete() except google.api_core.exceptions.NotFound: logger.warning( @@ -336,6 +336,6 @@ def teardown( ) def _get_client( - self, online_config: BigTableOnlineStoreConfig, admin: bool = False + self, online_config: BigtableOnlineStoreConfig, admin: bool = False ): return bigtable.Client(project=online_config.project_id, admin=admin) diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index 17f4177b4d2..673d039ff0b 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -48,7 +48,7 @@ "redis": "feast.infra.online_stores.redis.RedisOnlineStore", "dynamodb": "feast.infra.online_stores.dynamodb.DynamoDBOnlineStore", "snowflake.online": "feast.infra.online_stores.snowflake.SnowflakeOnlineStore", - "bigtable": "feast.infra.online_stores.bigtable.BigTableOnlineStore", + "bigtable": "feast.infra.online_stores.bigtable.BigtableOnlineStore", "postgres": "feast.infra.online_stores.contrib.postgres.PostgreSQLOnlineStore", "hbase": "feast.infra.online_stores.contrib.hbase_online_store.hbase.HbaseOnlineStore", "cassandra": "feast.infra.online_stores.contrib.cassandra_online_store.cassandra_online_store.CassandraOnlineStore", diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 72e81098af2..174b0b91ad1 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -52,7 +52,7 @@ create_pushable_feature_view, ) from tests.integration.feature_repos.universal.online_store.bigtable import ( - BigTableOnlineStoreCreator, + BigtableOnlineStoreCreator, ) from tests.integration.feature_repos.universal.online_store.datastore import ( DatastoreOnlineStoreCreator, @@ -171,7 +171,7 @@ "redis": (REDIS_CONFIG, RedisOnlineStoreCreator), "dynamodb": (DYNAMO_CONFIG, DynamoDBOnlineStoreCreator), "datastore": ("datastore", DatastoreOnlineStoreCreator), - "bigtable": ("bigtable", BigTableOnlineStoreCreator), + "bigtable": ("bigtable", BigtableOnlineStoreCreator), } for key, replacement in replacements.items(): diff --git a/sdk/python/tests/integration/feature_repos/universal/online_store/bigtable.py b/sdk/python/tests/integration/feature_repos/universal/online_store/bigtable.py index c40457a8111..c06143e245b 100644 --- a/sdk/python/tests/integration/feature_repos/universal/online_store/bigtable.py +++ b/sdk/python/tests/integration/feature_repos/universal/online_store/bigtable.py @@ -10,7 +10,7 @@ ) -class BigTableOnlineStoreCreator(OnlineStoreCreator): +class BigtableOnlineStoreCreator(OnlineStoreCreator): gcp_project = "test-project" host = "0.0.0.0" port = "8086" From 3cd76a89bbecf398ae16185998d70b26b4de0ec5 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Wed, 28 Sep 2022 20:55:07 -0400 Subject: [PATCH 14/24] Wrote some Bigtable documentation. Closely mirrors the docs for the other online stores. Signed-off-by: Abhin Chhabra --- docs/reference/online-stores/bigtable.md | 56 +++++++++++++++++++ docs/specs/online_store_format.md | 25 ++++++++- .../gcp/feature_repo/feature_store.yaml | 5 ++ 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 docs/reference/online-stores/bigtable.md diff --git a/docs/reference/online-stores/bigtable.md b/docs/reference/online-stores/bigtable.md new file mode 100644 index 00000000000..d03426e102b --- /dev/null +++ b/docs/reference/online-stores/bigtable.md @@ -0,0 +1,56 @@ +# Bigtable online store + +## Description + +The [Bigtable](https://cloud.google.com/bigtable) online store provides support for +materializing feature values into Cloud Bigtable. The data model used to store feature +values in Bigtable is described in more detail +[here](../../specs/online_store_format.md#google-bigtable-online-store-format). + +## Getting started + +In order to use this online store, you'll need to run `pip install 'feast[gcp]'`. You +can then get started with the command `feast init REPO_NAME -t gcp`. + +## Example + +{% code title="feature_store.yaml" %} +```yaml +project: my_feature_repo +registry: data/registry.db +provider: gcp +online_store: + type: bigtable + project_id: my_gcp_project + instance: my_bigtable_instance +``` +{% endcode %} + +The full set of configuration options is available in +[BigtableOnlineStoreConfig](https://rtd.feast.dev/en/latest/#feast.infra.online_stores.bigtable.BigtableOnlineStoreConfig). + +## Functionality Matrix + +The set of functionality supported by online stores is described in detail [here](overview.md#functionality). +Below is a matrix indicating which functionality is supported by the Bigtable online store. + +| | Bigtable | +|-----------------------------------------------------------|----------| +| write feature values to the online store | yes | +| read feature values from the online store | yes | +| update infrastructure (e.g. tables) in the online store | yes | +| teardown infrastructure (e.g. tables) in the online store | yes | +| generate a plan of infrastructure changes | no | +| support for on-demand transforms | yes | +| readable by Python SDK | yes | +| readable by Java | no | +| readable by Go | no | +| support for entityless feature views | yes | +| support for concurrent writing to the same key | no | +| support for ttl (time to live) at retrieval | no | +| support for deleting expired data | no | +| collocated by feature view | yes | +| collocated by feature service | no | +| collocated by entity key | yes | + +To compare this set of functionality against other online stores, please see the full [functionality matrix](overview.md#functionality-matrix). diff --git a/docs/specs/online_store_format.md b/docs/specs/online_store_format.md index 5c3c545c8d6..4022138e319 100644 --- a/docs/specs/online_store_format.md +++ b/docs/specs/online_store_format.md @@ -92,6 +92,29 @@ Other types of entity keys are not supported in this version of the specificatio ![Datastore Online Example](datastore_online_example.png) +## Google Bigtable Online Store Format + +[Bigtable storage model](https://cloud.google.com/bigtable/docs/overview#storage-model) +consists of massively scalable tables, with each row keyed by a "row key". The rows in a +table are stored lexicographically sorted by this row key. + +We use the following structure to store feature data in Bigtable: + +* All feature data for an entity or a specific group of entities is stored in the same + table. The table name is derived by concatenating the lexicographically sorted names + of entities. +* This implementation only uses one column family per table, named `features`. +* Each row key is created by concatenating a hash derived from the specific entity keys + and the name of the feature view. Each row only stores feature values for a specific + feature view. This arrangement also means that feature values for a given group of + entities are colocated. +* The columns used in each row are named after the features in the feature view. + Bigtable is perfectly content being sparsely populated. +* By default, we store 1 historical value of each feature value. This can be configured + using the `max_versions` setting in `BigtableOnlineStoreConfig`. This implementation + of the online store does not have the ability to revert any given value to its old + self. To use the historical version, you'll have to use custom code. + ## Cassandra/Astra DB Online Store Format ### Overview @@ -250,4 +273,4 @@ message BoolList { repeated bool val = 1; } -``` \ No newline at end of file +``` diff --git a/sdk/python/feast/templates/gcp/feature_repo/feature_store.yaml b/sdk/python/feast/templates/gcp/feature_repo/feature_store.yaml index 7d4096615ac..e3d9d1d1e61 100644 --- a/sdk/python/feast/templates/gcp/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/gcp/feature_repo/feature_store.yaml @@ -13,6 +13,11 @@ online_store: # type: datastore # project_id: my_gcp_project # namespace: my_datastore_namespace +# See https://docs.feast.dev/reference/online-stores/bigtable +#online_store: +# type: bigtable +# project_id: my_gcp_project +# instance: my_bigtable_instance # See https://docs.feast.dev/reference/online-stores/redis #online_store: # type: redis From c7449cc9daf15bfe116696d929746636087b5ba4 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Thu, 29 Sep 2022 21:04:46 -0400 Subject: [PATCH 15/24] Bugfix: Deal with missing row keys. It looks like the bigtable client will just skip over non-existent row keys. Signed-off-by: Abhin Chhabra --- sdk/python/feast/infra/online_stores/bigtable.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/infra/online_stores/bigtable.py b/sdk/python/feast/infra/online_stores/bigtable.py index 0811ca562cb..4da882e0737 100644 --- a/sdk/python/feast/infra/online_stores/bigtable.py +++ b/sdk/python/feast/infra/online_stores/bigtable.py @@ -89,7 +89,14 @@ def online_read( ), ) - return [self._process_bt_row(row) for row in rows] + # The BigTable client library only returns rows for keys that are found. This + # means that it's our responsibility to match the returned rows to the original + # `row_keys` and make sure that we're returning a list of the same length as + # `entity_keys`. + bt_rows_dict: Dict[str, bigtable.row.PartialRowData] = { + row.row_key: row for row in rows + } + return [self._process_bt_row(bt_rows_dict.get(row_key)) for row_key in row_keys] def _process_bt_row( self, row: bigtable.row.PartialRowData @@ -206,7 +213,7 @@ def _write_rows_to_bt( def _compute_row_key( self, entity_key: EntityKeyProto, feature_view_name: str, config: RepoConfig - ) -> str: + ) -> bytes: entity_id = compute_entity_id( entity_key, entity_key_serialization_version=config.entity_key_serialization_version, @@ -216,7 +223,7 @@ def _compute_row_key( # the row for a feature_view, we suffix the name of the feature_view itself. # This also ensures that features for entities from various feature_views are # colocated. - return f"{entity_id}#{feature_view_name}" + return f"{entity_id}#{feature_view_name}".encode() def update( self, From f3563127e4ceba209619dd799e32e174a5f13980 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Thu, 29 Sep 2022 21:32:41 -0400 Subject: [PATCH 16/24] Fix linting issues. Signed-off-by: Abhin Chhabra --- sdk/python/feast/infra/online_stores/bigtable.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/infra/online_stores/bigtable.py b/sdk/python/feast/infra/online_stores/bigtable.py index 4da882e0737..a741764f3ad 100644 --- a/sdk/python/feast/infra/online_stores/bigtable.py +++ b/sdk/python/feast/infra/online_stores/bigtable.py @@ -93,13 +93,13 @@ def online_read( # means that it's our responsibility to match the returned rows to the original # `row_keys` and make sure that we're returning a list of the same length as # `entity_keys`. - bt_rows_dict: Dict[str, bigtable.row.PartialRowData] = { + bt_rows_dict: Dict[bytes, bigtable.row.PartialRowData] = { row.row_key: row for row in rows } return [self._process_bt_row(bt_rows_dict.get(row_key)) for row_key in row_keys] def _process_bt_row( - self, row: bigtable.row.PartialRowData + self, row: Optional[bigtable.row.PartialRowData] ) -> Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]: res = {} From ff62c6b68c273962a3409a0e32dc33d2bec3109b Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Fri, 30 Sep 2022 17:08:30 -0400 Subject: [PATCH 17/24] Generate requirements files. - As of version `1.49`, the various python packages in the [grpc repo](https://github.com/grpc/grpc/tree/master/src/python) require `protobuf>=4.21.3`. Unfortunately, this is incompatible with all versions of `tensorflow-metadata` (see [this issue](https://github.com/tensorflow/metadata/issues/37)). And since `piptools` doesn't backtrack during dependency resolution, the requirement files cannot be regenerated without adding an upper limit on these grpc libraries directly in `setup.py`. - The previous attempt to upgrade usages of the `mock_dynamodb2` decorator to the newest version failed. Since I'm not an expert in dynamodb, it made sense to just cap the test tool to the version already being used in CI. Signed-off-by: Abhin Chhabra --- .../requirements/py3.10-ci-requirements.txt | 187 +++++++++-------- .../requirements/py3.10-requirements.txt | 66 +++--- .../requirements/py3.8-ci-requirements.txt | 140 +++++++------ .../requirements/py3.8-requirements.txt | 44 ++-- .../requirements/py3.9-ci-requirements.txt | 193 ++++++++++-------- .../requirements/py3.9-requirements.txt | 66 +++--- .../test_dynamodb_online_store.py | 16 +- setup.py | 37 ++-- 8 files changed, 396 insertions(+), 353 deletions(-) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index c1656ee7e6f..f6ba49d2af7 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -14,13 +14,13 @@ adlfs==0.5.9 # via feast (setup.py) aiobotocore==2.1.2 # via s3fs -aiohttp==3.8.1 +aiohttp==3.8.3 # via # adlfs # aiobotocore # gcsfs # s3fs -aioitertools==0.10.0 +aioitertools==0.11.0 # via aiobotocore aiosignal==1.2.0 # via aiohttp @@ -56,7 +56,7 @@ attrs==22.1.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.25.0 +azure-core==1.25.1 # via # adlfs # azure-identity @@ -64,7 +64,7 @@ azure-core==1.25.0 # msrest azure-datalake-store==0.0.52 # via adlfs -azure-identity==1.10.0 +azure-identity==1.11.0 # via # adlfs # feast (setup.py) @@ -76,7 +76,7 @@ babel==2.10.3 # via sphinx backcall==0.2.0 # via ipython -black==22.6.0 +black==22.8.0 # via feast (setup.py) boto3==1.20.23 # via @@ -102,7 +102,7 @@ cachetools==5.2.0 # via google-auth cassandra-driver==3.25.0 # via feast (setup.py) -certifi==2022.6.15 +certifi==2022.9.24 # via # kubernetes # minio @@ -116,7 +116,7 @@ cffi==1.15.1 # snowflake-connector-python cfgv==3.3.1 # via pre-commit -charset-normalizer==2.0.12 +charset-normalizer==2.1.1 # via # aiohttp # requests @@ -131,13 +131,13 @@ click==8.1.3 # moreorless # pip-tools # uvicorn -cloudpickle==2.1.0 +cloudpickle==2.2.0 # via dask colorama==0.4.5 # via # feast (setup.py) # great-expectations -coverage[toml]==6.4.4 +coverage[toml]==6.5.0 # via pytest-cov cryptography==35.0.0 # via @@ -155,7 +155,7 @@ dask==2022.1.1 # via feast (setup.py) dataclasses==0.6 # via great-expectations -db-dtypes==1.0.3 +db-dtypes==1.0.4 # via google-cloud-bigquery decorator==5.1.1 # via @@ -169,7 +169,7 @@ dill==0.3.5.1 # via # feast (setup.py) # multiprocess -distlib==0.3.5 +distlib==0.3.6 # via virtualenv docker==6.0.0 # via @@ -183,19 +183,21 @@ entrypoints==0.4 # via altair execnet==1.9.0 # via pytest-xdist -executing==0.10.0 +executing==1.1.0 # via stack-data -fastapi==0.79.1 +fastapi==0.85.0 # via feast (setup.py) -fastavro==1.6.0 +fastavro==1.6.1 # via # feast (setup.py) # pandavro -fastjsonschema==2.16.1 +fastjsonschema==2.16.2 # via nbformat filelock==3.8.0 - # via virtualenv -firebase-admin==5.2.0 + # via + # snowflake-connector-python + # virtualenv +firebase-admin==5.4.0 # via feast (setup.py) fissix==21.11.13 # via bowler @@ -215,20 +217,21 @@ gcsfs==2022.1.0 # via feast (setup.py) geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.8.2 +google-api-core[grpc]==2.10.1 # via # feast (setup.py) # firebase-admin # google-api-python-client # google-cloud-bigquery # google-cloud-bigquery-storage + # google-cloud-bigtable # google-cloud-core # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.57.0 +google-api-python-client==2.63.0 # via firebase-admin -google-auth==2.10.0 +google-auth==2.12.0 # via # gcsfs # google-api-core @@ -240,63 +243,71 @@ google-auth==2.10.0 # kubernetes google-auth-httplib2==0.1.0 # via google-api-python-client -google-auth-oauthlib==0.5.2 +google-auth-oauthlib==0.5.3 # via gcsfs -google-cloud-bigquery[pandas]==3.3.2 +google-cloud-bigquery[pandas]==3.3.3 # via feast (setup.py) -google-cloud-bigquery-storage==2.14.2 +google-cloud-bigquery-storage==2.16.1 # via # feast (setup.py) # google-cloud-bigquery +google-cloud-bigtable==2.12.0 + # via feast (setup.py) google-cloud-core==2.3.2 # via # google-cloud-bigquery + # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore # google-cloud-storage google-cloud-datastore==2.8.1 # via feast (setup.py) -google-cloud-firestore==2.6.1 +google-cloud-firestore==2.7.0 # via firebase-admin google-cloud-storage==2.5.0 # via # feast (setup.py) # firebase-admin # gcsfs -google-crc32c==1.3.0 +google-crc32c==1.5.0 # via google-resumable-media -google-resumable-media==2.3.3 +google-resumable-media==2.4.0 # via # google-cloud-bigquery # google-cloud-storage -googleapis-common-protos==1.56.4 +googleapis-common-protos[grpc]==1.56.4 # via # feast (setup.py) # google-api-core + # grpc-google-iam-v1 # grpcio-status # tensorflow-metadata great-expectations==0.14.13 # via feast (setup.py) -greenlet==1.1.2 - # via sqlalchemy -grpcio==1.47.0 +grpc-google-iam-v1==0.12.4 + # via google-cloud-bigtable +grpcio==1.49.1 # via # feast (setup.py) # google-api-core # google-cloud-bigquery + # googleapis-common-protos + # grpc-google-iam-v1 # grpcio-reflection # grpcio-status # grpcio-testing # grpcio-tools -grpcio-reflection==1.47.0 +grpcio-reflection==1.48.2 # via feast (setup.py) -grpcio-status==1.47.0 - # via google-api-core -grpcio-testing==1.47.0 +grpcio-status==1.48.2 + # via + # feast (setup.py) + # google-api-core +grpcio-testing==1.48.2 # via feast (setup.py) -grpcio-tools==1.47.0 +grpcio-tools==1.48.2 # via feast (setup.py) -h11==0.13.0 +h11==0.14.0 # via uvicorn happybase==1.2.0 # via feast (setup.py) @@ -306,11 +317,11 @@ httplib2==0.20.4 # via # google-api-python-client # google-auth-httplib2 -httptools==0.4.0 +httptools==0.5.0 # via uvicorn -identify==2.5.3 +identify==2.5.5 # via pre-commit -idna==3.3 +idna==3.4 # via # anyio # requests @@ -322,7 +333,7 @@ importlib-metadata==4.12.0 # via great-expectations iniconfig==1.1.1 # via pytest -ipython==8.4.0 +ipython==8.5.0 # via great-expectations isodate==0.6.1 # via msrest @@ -345,7 +356,7 @@ jsonpatch==1.32 # via great-expectations jsonpointer==2.3 # via jsonpatch -jsonschema==4.13.0 +jsonschema==4.16.0 # via # altair # feast (setup.py) @@ -377,7 +388,7 @@ moreorless==0.4.0 # via bowler moto==3.1.18 # via feast (setup.py) -msal==1.18.0 +msal==1.19.0 # via # azure-identity # msal-extensions @@ -397,7 +408,7 @@ multidict==6.0.2 # yarl multiprocess==0.70.13 # via bytewax -mypy==0.971 +mypy==0.981 # via # feast (setup.py) # sqlalchemy @@ -409,11 +420,11 @@ mypy-protobuf==3.1 # via feast (setup.py) mysqlclient==2.1.1 # via feast (setup.py) -nbformat==5.4.0 +nbformat==5.6.1 # via great-expectations nodeenv==1.7.0 # via pre-commit -numpy==1.23.2 +numpy==1.23.3 # via # altair # db-dtypes @@ -439,7 +450,7 @@ packaging==21.3 # pytest # redis # sphinx -pandas==1.4.3 +pandas==1.4.4 # via # altair # db-dtypes @@ -454,7 +465,7 @@ parso==0.8.3 # via jedi partd==1.3.0 # via dask -pathspec==0.9.0 +pathspec==0.10.1 # via black pbr==5.10.0 # via mock @@ -478,21 +489,23 @@ portalocker==2.5.1 # via msal-extensions pre-commit==2.20.0 # via feast (setup.py) -prompt-toolkit==3.0.30 +prompt-toolkit==3.0.31 # via ipython -proto-plus==1.22.0 +proto-plus==1.22.1 # via # feast (setup.py) # google-cloud-bigquery # google-cloud-bigquery-storage + # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore -protobuf==3.20.2 +protobuf==3.20.3 # via # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage + # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore # googleapis-common-protos @@ -519,7 +532,7 @@ py-cpuinfo==8.0.0 # via pytest-benchmark py4j==0.10.9.5 # via pyspark -pyarrow==6.0.1 +pyarrow==8.0.0 # via # db-dtypes # feast (setup.py) @@ -539,7 +552,7 @@ pycparser==2.21 # via cffi pycryptodomex==3.15.0 # via snowflake-connector-python -pydantic==1.9.2 +pydantic==1.10.2 # via # fastapi # feast (setup.py) @@ -550,7 +563,7 @@ pygments==2.13.0 # feast (setup.py) # ipython # sphinx -pyjwt[crypto]==2.4.0 +pyjwt[crypto]==2.5.0 # via # adal # msal @@ -562,7 +575,9 @@ pymysql==1.0.2 pyodbc==4.0.34 # via feast (setup.py) pyopenssl==22.0.0 - # via snowflake-connector-python + # via + # feast (setup.py) + # snowflake-connector-python pyparsing==2.4.7 # via # great-expectations @@ -572,7 +587,7 @@ pyrsistent==0.18.1 # via jsonschema pyspark==3.3.0 # via feast (setup.py) -pytest==7.1.2 +pytest==7.1.3 # via # feast (setup.py) # pytest-benchmark @@ -585,7 +600,7 @@ pytest==7.1.2 # pytest-xdist pytest-benchmark==3.4.1 # via feast (setup.py) -pytest-cov==3.0.0 +pytest-cov==4.0.0 # via feast (setup.py) pytest-forked==1.4.0 # via pytest-xdist @@ -608,7 +623,7 @@ python-dateutil==2.8.2 # kubernetes # moto # pandas -python-dotenv==0.20.0 +python-dotenv==0.21.0 # via uvicorn pytz==2022.2.1 # via @@ -666,7 +681,7 @@ s3fs==2022.1.0 # via feast (setup.py) s3transfer==0.5.2 # via boto3 -scipy==1.9.0 +scipy==1.9.1 # via great-expectations six==1.16.0 # via @@ -684,11 +699,11 @@ six==1.16.0 # msrestazure # pandavro # python-dateutil -sniffio==1.2.0 +sniffio==1.3.0 # via anyio snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==2.7.8 +snowflake-connector-python[pandas]==2.8.0 # via feast (setup.py) sphinx==4.3.2 # via @@ -708,23 +723,23 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -sqlalchemy[mypy]==1.4.40 +sqlalchemy[mypy]==1.4.41 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a25 +sqlalchemy2-stubs==0.0.2a27 # via sqlalchemy -stack-data==0.4.0 +stack-data==0.5.1 # via ipython -starlette==0.19.1 +starlette==0.20.4 # via fastapi tabulate==0.8.10 # via feast (setup.py) -tenacity==8.0.1 +tenacity==8.1.0 # via feast (setup.py) -tensorflow-metadata==1.9.0 +tensorflow-metadata==1.10.0 # via feast (setup.py) -termcolor==1.1.0 +termcolor==2.0.1 # via great-expectations -testcontainers==3.6.1 +testcontainers==3.7.0 # via feast (setup.py) thriftpy2==0.4.14 # via happybase @@ -745,21 +760,23 @@ toolz==0.12.0 # altair # dask # partd -tqdm==4.64.0 +tqdm==4.64.1 # via # feast (setup.py) # great-expectations -traitlets==5.3.0 +traitlets==5.4.0 # via # ipython # jupyter-core # matplotlib-inline # nbformat -trino==0.315.0 +trino==0.316.0 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -types-protobuf==3.19.22 +types-cryptography==3.3.23 + # via pyjwt +types-protobuf==3.20.4 # via # feast (setup.py) # mypy-protobuf @@ -769,17 +786,17 @@ types-python-dateutil==2.8.19 # via feast (setup.py) types-pytz==2022.2.1.0 # via feast (setup.py) -types-pyyaml==6.0.11 +types-pyyaml==6.0.12 # via feast (setup.py) -types-redis==4.3.14 +types-redis==4.3.21 # via feast (setup.py) -types-requests==2.28.9 +types-requests==2.28.11 # via feast (setup.py) -types-setuptools==65.1.0 +types-setuptools==65.4.0.0 # via feast (setup.py) types-tabulate==0.8.11 # via feast (setup.py) -types-urllib3==1.26.23 +types-urllib3==1.26.25 # via types-requests typing-extensions==4.3.0 # via @@ -787,14 +804,15 @@ typing-extensions==4.3.0 # great-expectations # mypy # pydantic + # snowflake-connector-python # sqlalchemy2-stubs -tzdata==2022.2 +tzdata==2022.4 # via pytz-deprecation-shim tzlocal==4.2 # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.11 +urllib3==1.26.12 # via # botocore # docker @@ -804,19 +822,20 @@ urllib3==1.26.11 # minio # requests # responses -uvicorn[standard]==0.18.2 + # snowflake-connector-python +uvicorn[standard]==0.18.3 # via feast (setup.py) -uvloop==0.16.0 +uvloop==0.17.0 # via uvicorn -virtualenv==20.16.3 +virtualenv==20.16.5 # via pre-commit volatile==2.1.0 # via bowler -watchfiles==0.16.1 +watchfiles==0.17.0 # via uvicorn wcwidth==0.2.5 # via prompt-toolkit -websocket-client==1.3.3 +websocket-client==1.4.1 # via # docker # kubernetes diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 91369309175..8b1928bb092 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -20,9 +20,9 @@ bowler==0.9.0 # via feast (setup.py) cachetools==5.2.0 # via google-auth -certifi==2022.6.15 +certifi==2022.9.24 # via requests -charset-normalizer==2.1.0 +charset-normalizer==2.1.1 # via requests click==8.1.3 # via @@ -30,7 +30,7 @@ click==8.1.3 # feast (setup.py) # moreorless # uvicorn -cloudpickle==2.1.0 +cloudpickle==2.2.0 # via dask colorama==0.4.5 # via feast (setup.py) @@ -38,44 +38,42 @@ dask==2022.1.1 # via feast (setup.py) dill==0.3.5.1 # via feast (setup.py) -fastapi==0.79.1 +fastapi==0.85.0 # via feast (setup.py) -fastavro==1.6.0 +fastavro==1.6.1 # via # feast (setup.py) # pandavro fissix==21.11.13 # via bowler -fsspec==2022.7.1 +fsspec==2022.8.2 # via dask -google-api-core==2.8.2 +google-api-core==2.10.1 # via feast (setup.py) -google-auth==2.10.0 +google-auth==2.12.0 # via google-api-core googleapis-common-protos==1.56.4 # via # feast (setup.py) # google-api-core # tensorflow-metadata -greenlet==1.1.2 - # via sqlalchemy -grpcio==1.47.0 +grpcio==1.49.1 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.47.0 +grpcio-reflection==1.48.2 # via feast (setup.py) -h11==0.13.0 +h11==0.14.0 # via uvicorn -httptools==0.4.0 +httptools==0.5.0 # via uvicorn -idna==3.3 +idna==3.4 # via # anyio # requests jinja2==3.1.2 # via feast (setup.py) -jsonschema==4.13.0 +jsonschema==4.16.0 # via feast (setup.py) locket==1.0.0 # via partd @@ -85,11 +83,11 @@ mmh3==3.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==0.971 +mypy==0.981 # via sqlalchemy mypy-extensions==0.4.3 # via mypy -numpy==1.23.2 +numpy==1.23.3 # via # feast (setup.py) # pandas @@ -97,7 +95,7 @@ numpy==1.23.2 # pyarrow packaging==21.3 # via dask -pandas==1.4.3 +pandas==1.5.0 # via # feast (setup.py) # pandavro @@ -105,9 +103,9 @@ pandavro==1.5.2 # via feast (setup.py) partd==1.3.0 # via dask -proto-plus==1.22.0 +proto-plus==1.22.1 # via feast (setup.py) -protobuf==3.20.2 +protobuf==3.20.3 # via # feast (setup.py) # google-api-core @@ -123,7 +121,7 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pydantic==1.9.2 +pydantic==1.10.2 # via # fastapi # feast (setup.py) @@ -135,7 +133,7 @@ pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas -python-dotenv==0.20.0 +python-dotenv==0.21.0 # via uvicorn pytz==2022.2.1 # via pandas @@ -154,19 +152,19 @@ six==1.16.0 # grpcio # pandavro # python-dateutil -sniffio==1.2.0 +sniffio==1.3.0 # via anyio -sqlalchemy[mypy]==1.4.40 +sqlalchemy[mypy]==1.4.41 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a25 +sqlalchemy2-stubs==0.0.2a27 # via sqlalchemy -starlette==0.19.1 +starlette==0.20.4 # via fastapi tabulate==0.8.10 # via feast (setup.py) -tenacity==8.0.1 +tenacity==8.1.0 # via feast (setup.py) -tensorflow-metadata==1.9.0 +tensorflow-metadata==1.10.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -176,7 +174,7 @@ toolz==0.12.0 # via # dask # partd -tqdm==4.64.0 +tqdm==4.64.1 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) @@ -185,15 +183,15 @@ typing-extensions==4.3.0 # mypy # pydantic # sqlalchemy2-stubs -urllib3==1.26.11 +urllib3==1.26.12 # via requests -uvicorn[standard]==0.18.2 +uvicorn[standard]==0.18.3 # via feast (setup.py) -uvloop==0.16.0 +uvloop==0.17.0 # via uvicorn volatile==2.1.0 # via bowler -watchfiles==0.16.1 +watchfiles==0.17.0 # via uvicorn websockets==10.3 # via uvicorn diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index dca0ba225ac..914fb198331 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -14,13 +14,13 @@ adlfs==0.5.9 # via feast (setup.py) aiobotocore==2.1.2 # via s3fs -aiohttp==3.8.1 +aiohttp==3.8.3 # via # adlfs # aiobotocore # gcsfs # s3fs -aioitertools==0.10.0 +aioitertools==0.11.0 # via aiobotocore aiosignal==1.2.0 # via aiohttp @@ -64,7 +64,7 @@ azure-core==1.25.1 # msrest azure-datalake-store==0.0.52 # via adlfs -azure-identity==1.10.0 +azure-identity==1.11.0 # via # adlfs # feast (setup.py) @@ -106,7 +106,7 @@ cachetools==5.2.0 # via google-auth cassandra-driver==3.25.0 # via feast (setup.py) -certifi==2022.6.15 +certifi==2022.9.24 # via # kubernetes # minio @@ -120,7 +120,7 @@ cffi==1.15.1 # snowflake-connector-python cfgv==3.3.1 # via pre-commit -charset-normalizer==2.0.12 +charset-normalizer==2.1.1 # via # aiohttp # requests @@ -135,13 +135,13 @@ click==8.1.3 # moreorless # pip-tools # uvicorn -cloudpickle==2.1.0 +cloudpickle==2.2.0 # via dask colorama==0.4.5 # via # feast (setup.py) # great-expectations -coverage[toml]==6.4.4 +coverage[toml]==6.5.0 # via pytest-cov cryptography==35.0.0 # via @@ -159,7 +159,7 @@ dask==2022.1.1 # via feast (setup.py) dataclasses==0.6 # via great-expectations -db-dtypes==1.0.3 +db-dtypes==1.0.4 # via google-cloud-bigquery decorator==5.1.1 # via @@ -187,19 +187,21 @@ entrypoints==0.4 # via altair execnet==1.9.0 # via pytest-xdist -executing==1.0.0 +executing==1.1.0 # via stack-data -fastapi==0.82.0 +fastapi==0.85.0 # via feast (setup.py) -fastavro==1.6.0 +fastavro==1.6.1 # via # feast (setup.py) # pandavro -fastjsonschema==2.16.1 +fastjsonschema==2.16.2 # via nbformat filelock==3.8.0 - # via virtualenv -firebase-admin==5.3.0 + # via + # snowflake-connector-python + # virtualenv +firebase-admin==5.4.0 # via feast (setup.py) fissix==21.11.13 # via bowler @@ -219,7 +221,7 @@ gcsfs==2022.1.0 # via feast (setup.py) geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.10.0 +google-api-core[grpc]==2.10.1 # via # feast (setup.py) # firebase-admin @@ -231,9 +233,9 @@ google-api-core[grpc]==2.10.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.58.0 +google-api-python-client==2.63.0 # via firebase-admin -google-auth==2.11.0 +google-auth==2.12.0 # via # gcsfs # google-api-core @@ -245,15 +247,15 @@ google-auth==2.11.0 # kubernetes google-auth-httplib2==0.1.0 # via google-api-python-client -google-auth-oauthlib==0.5.2 +google-auth-oauthlib==0.5.3 # via gcsfs -google-cloud-bigquery[pandas]==3.3.2 +google-cloud-bigquery[pandas]==3.3.3 # via feast (setup.py) -google-cloud-bigquery-storage==2.14.2 +google-cloud-bigquery-storage==2.16.1 # via # feast (setup.py) # google-cloud-bigquery -google-cloud-bigtable==2.11.3 +google-cloud-bigtable==2.12.0 # via feast (setup.py) google-cloud-core==2.3.2 # via @@ -264,7 +266,7 @@ google-cloud-core==2.3.2 # google-cloud-storage google-cloud-datastore==2.8.1 # via feast (setup.py) -google-cloud-firestore==2.6.1 +google-cloud-firestore==2.7.0 # via firebase-admin google-cloud-storage==2.5.0 # via @@ -273,7 +275,7 @@ google-cloud-storage==2.5.0 # gcsfs google-crc32c==1.5.0 # via google-resumable-media -google-resumable-media==2.3.3 +google-resumable-media==2.4.0 # via # google-cloud-bigquery # google-cloud-storage @@ -288,7 +290,7 @@ great-expectations==0.14.13 # via feast (setup.py) grpc-google-iam-v1==0.12.4 # via google-cloud-bigtable -grpcio==1.48.1 +grpcio==1.49.1 # via # feast (setup.py) # google-api-core @@ -299,15 +301,17 @@ grpcio==1.48.1 # grpcio-status # grpcio-testing # grpcio-tools -grpcio-reflection==1.48.1 +grpcio-reflection==1.48.2 # via feast (setup.py) -grpcio-status==1.48.1 - # via google-api-core -grpcio-testing==1.48.1 +grpcio-status==1.48.2 + # via + # feast (setup.py) + # google-api-core +grpcio-testing==1.48.2 # via feast (setup.py) -grpcio-tools==1.48.1 +grpcio-tools==1.48.2 # via feast (setup.py) -h11==0.13.0 +h11==0.14.0 # via uvicorn happybase==1.2.0 # via feast (setup.py) @@ -317,11 +321,11 @@ httplib2==0.20.4 # via # google-api-python-client # google-auth-httplib2 -httptools==0.4.0 +httptools==0.5.0 # via uvicorn -identify==2.5.3 +identify==2.5.5 # via pre-commit -idna==3.3 +idna==3.4 # via # anyio # requests @@ -335,7 +339,7 @@ importlib-resources==5.9.0 # via jsonschema iniconfig==1.1.1 # via pytest -ipython==8.4.0 +ipython==8.5.0 # via great-expectations isodate==0.6.1 # via msrest @@ -358,7 +362,7 @@ jsonpatch==1.32 # via great-expectations jsonpointer==2.3 # via jsonpatch -jsonschema==4.15.0 +jsonschema==4.16.0 # via # altair # feast (setup.py) @@ -388,9 +392,9 @@ mock==2.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -moto==4.0.2 +moto==3.1.18 # via feast (setup.py) -msal==1.18.0 +msal==1.19.0 # via # azure-identity # msal-extensions @@ -410,7 +414,7 @@ multidict==6.0.2 # yarl multiprocess==0.70.13 # via bytewax -mypy==0.971 +mypy==0.981 # via # feast (setup.py) # sqlalchemy @@ -422,11 +426,11 @@ mypy-protobuf==3.1 # via feast (setup.py) mysqlclient==2.1.1 # via feast (setup.py) -nbformat==5.4.0 +nbformat==5.6.1 # via great-expectations nodeenv==1.7.0 # via pre-commit -numpy==1.23.2 +numpy==1.23.3 # via # altair # db-dtypes @@ -503,7 +507,7 @@ proto-plus==1.22.1 # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore -protobuf==3.20.2 +protobuf==3.20.3 # via # feast (setup.py) # google-api-core @@ -536,7 +540,7 @@ py-cpuinfo==8.0.0 # via pytest-benchmark py4j==0.10.9.5 # via pyspark -pyarrow==6.0.1 +pyarrow==8.0.0 # via # db-dtypes # feast (setup.py) @@ -556,7 +560,7 @@ pycparser==2.21 # via cffi pycryptodomex==3.15.0 # via snowflake-connector-python -pydantic==1.10.1 +pydantic==1.10.2 # via # fastapi # feast (setup.py) @@ -567,7 +571,7 @@ pygments==2.13.0 # feast (setup.py) # ipython # sphinx -pyjwt[crypto]==2.4.0 +pyjwt[crypto]==2.5.0 # via # adal # msal @@ -579,7 +583,9 @@ pymysql==1.0.2 pyodbc==4.0.34 # via feast (setup.py) pyopenssl==22.0.0 - # via snowflake-connector-python + # via + # feast (setup.py) + # snowflake-connector-python pyparsing==2.4.7 # via # great-expectations @@ -602,7 +608,7 @@ pytest==7.1.3 # pytest-xdist pytest-benchmark==3.4.1 # via feast (setup.py) -pytest-cov==3.0.0 +pytest-cov==4.0.0 # via feast (setup.py) pytest-forked==1.4.0 # via pytest-xdist @@ -707,7 +713,7 @@ sniffio==1.3.0 # via anyio snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==2.7.8 +snowflake-connector-python[pandas]==2.8.0 # via feast (setup.py) sphinx==4.3.2 # via @@ -727,23 +733,23 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -sqlalchemy[mypy]==1.4.40 +sqlalchemy[mypy]==1.4.41 # via feast (setup.py) sqlalchemy2-stubs==0.0.2a27 # via sqlalchemy -stack-data==0.5.0 +stack-data==0.5.1 # via ipython -starlette==0.19.1 +starlette==0.20.4 # via fastapi tabulate==0.8.10 # via feast (setup.py) -tenacity==8.0.1 +tenacity==8.1.0 # via feast (setup.py) tensorflow-metadata==1.10.0 # via feast (setup.py) -termcolor==1.1.0 +termcolor==2.0.1 # via great-expectations -testcontainers==3.6.1 +testcontainers==3.7.0 # via feast (setup.py) thriftpy2==0.4.14 # via happybase @@ -768,17 +774,19 @@ tqdm==4.64.1 # via # feast (setup.py) # great-expectations -traitlets==5.3.0 +traitlets==5.4.0 # via # ipython # jupyter-core # matplotlib-inline # nbformat -trino==0.315.0 +trino==0.316.0 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -types-protobuf==3.20.1 +types-cryptography==3.3.23 + # via pyjwt +types-protobuf==3.20.4 # via # feast (setup.py) # mypy-protobuf @@ -788,17 +796,17 @@ types-python-dateutil==2.8.19 # via feast (setup.py) types-pytz==2022.2.1.0 # via feast (setup.py) -types-pyyaml==6.0.11 +types-pyyaml==6.0.12 # via feast (setup.py) -types-redis==4.3.20 +types-redis==4.3.21 # via feast (setup.py) -types-requests==2.28.9 +types-requests==2.28.11 # via feast (setup.py) -types-setuptools==65.3.0 +types-setuptools==65.4.0.0 # via feast (setup.py) types-tabulate==0.8.11 # via feast (setup.py) -types-urllib3==1.26.23 +types-urllib3==1.26.25 # via types-requests typing-extensions==4.3.0 # via @@ -808,9 +816,10 @@ typing-extensions==4.3.0 # great-expectations # mypy # pydantic + # snowflake-connector-python # sqlalchemy2-stubs # starlette -tzdata==2022.2 +tzdata==2022.4 # via pytz-deprecation-shim tzlocal==4.2 # via great-expectations @@ -826,15 +835,16 @@ urllib3==1.26.12 # minio # requests # responses + # snowflake-connector-python uvicorn[standard]==0.18.3 # via feast (setup.py) -uvloop==0.16.0 +uvloop==0.17.0 # via uvicorn -virtualenv==20.16.4 +virtualenv==20.16.5 # via pre-commit volatile==2.1.0 # via bowler -watchfiles==0.16.1 +watchfiles==0.17.0 # via uvicorn wcwidth==0.2.5 # via prompt-toolkit diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 3410af7585f..d2e40bcf397 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -20,7 +20,7 @@ bowler==0.9.0 # via feast (setup.py) cachetools==5.2.0 # via google-auth -certifi==2022.6.15 +certifi==2022.9.24 # via requests charset-normalizer==2.1.1 # via requests @@ -30,7 +30,7 @@ click==8.1.3 # feast (setup.py) # moreorless # uvicorn -cloudpickle==2.1.0 +cloudpickle==2.2.0 # via dask colorama==0.4.5 # via feast (setup.py) @@ -38,9 +38,9 @@ dask==2022.1.1 # via feast (setup.py) dill==0.3.5.1 # via feast (setup.py) -fastapi==0.82.0 +fastapi==0.85.0 # via feast (setup.py) -fastavro==1.6.0 +fastavro==1.6.1 # via # feast (setup.py) # pandavro @@ -48,26 +48,26 @@ fissix==21.11.13 # via bowler fsspec==2022.8.2 # via dask -google-api-core==2.10.0 +google-api-core==2.10.1 # via feast (setup.py) -google-auth==2.11.0 +google-auth==2.12.0 # via google-api-core googleapis-common-protos==1.56.4 # via # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.48.1 +grpcio==1.49.1 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.48.1 +grpcio-reflection==1.48.2 # via feast (setup.py) -h11==0.13.0 +h11==0.14.0 # via uvicorn -httptools==0.4.0 +httptools==0.5.0 # via uvicorn -idna==3.3 +idna==3.4 # via # anyio # requests @@ -75,7 +75,7 @@ importlib-resources==5.9.0 # via jsonschema jinja2==3.1.2 # via feast (setup.py) -jsonschema==4.15.0 +jsonschema==4.16.0 # via feast (setup.py) locket==1.0.0 # via partd @@ -85,11 +85,11 @@ mmh3==3.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==0.971 +mypy==0.981 # via sqlalchemy mypy-extensions==0.4.3 # via mypy -numpy==1.23.2 +numpy==1.23.3 # via # feast (setup.py) # pandas @@ -97,7 +97,7 @@ numpy==1.23.2 # pyarrow packaging==21.3 # via dask -pandas==1.4.4 +pandas==1.5.0 # via # feast (setup.py) # pandavro @@ -109,7 +109,7 @@ pkgutil-resolve-name==1.3.10 # via jsonschema proto-plus==1.22.1 # via feast (setup.py) -protobuf==3.20.2 +protobuf==3.20.3 # via # feast (setup.py) # google-api-core @@ -125,7 +125,7 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pydantic==1.10.1 +pydantic==1.10.2 # via # fastapi # feast (setup.py) @@ -158,15 +158,15 @@ six==1.16.0 # python-dateutil sniffio==1.3.0 # via anyio -sqlalchemy[mypy]==1.4.40 +sqlalchemy[mypy]==1.4.41 # via feast (setup.py) sqlalchemy2-stubs==0.0.2a27 # via sqlalchemy -starlette==0.19.1 +starlette==0.20.4 # via fastapi tabulate==0.8.10 # via feast (setup.py) -tenacity==8.0.1 +tenacity==8.1.0 # via feast (setup.py) tensorflow-metadata==1.10.0 # via feast (setup.py) @@ -192,11 +192,11 @@ urllib3==1.26.12 # via requests uvicorn[standard]==0.18.3 # via feast (setup.py) -uvloop==0.16.0 +uvloop==0.17.0 # via uvicorn volatile==2.1.0 # via bowler -watchfiles==0.16.1 +watchfiles==0.17.0 # via uvicorn websockets==10.3 # via uvicorn diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 4893035a7e7..8e42d0dc79b 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -14,13 +14,13 @@ adlfs==0.5.9 # via feast (setup.py) aiobotocore==2.1.2 # via s3fs -aiohttp==3.8.1 +aiohttp==3.8.3 # via # adlfs # aiobotocore # gcsfs # s3fs -aioitertools==0.10.0 +aioitertools==0.11.0 # via aiobotocore aiosignal==1.2.0 # via aiohttp @@ -56,7 +56,7 @@ attrs==22.1.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.25.0 +azure-core==1.25.1 # via # adlfs # azure-identity @@ -64,7 +64,7 @@ azure-core==1.25.0 # msrest azure-datalake-store==0.0.52 # via adlfs -azure-identity==1.10.0 +azure-identity==1.11.0 # via # adlfs # feast (setup.py) @@ -76,7 +76,7 @@ babel==2.10.3 # via sphinx backcall==0.2.0 # via ipython -black==22.6.0 +black==22.8.0 # via feast (setup.py) boto3==1.20.23 # via @@ -102,7 +102,7 @@ cachetools==5.2.0 # via google-auth cassandra-driver==3.25.0 # via feast (setup.py) -certifi==2022.6.15 +certifi==2022.9.24 # via # kubernetes # minio @@ -116,7 +116,7 @@ cffi==1.15.1 # snowflake-connector-python cfgv==3.3.1 # via pre-commit -charset-normalizer==2.0.12 +charset-normalizer==2.1.1 # via # aiohttp # requests @@ -131,13 +131,13 @@ click==8.1.3 # moreorless # pip-tools # uvicorn -cloudpickle==2.1.0 +cloudpickle==2.2.0 # via dask colorama==0.4.5 # via # feast (setup.py) # great-expectations -coverage[toml]==6.4.4 +coverage[toml]==6.5.0 # via pytest-cov cryptography==35.0.0 # via @@ -155,7 +155,7 @@ dask==2022.1.1 # via feast (setup.py) dataclasses==0.6 # via great-expectations -db-dtypes==1.0.3 +db-dtypes==1.0.4 # via google-cloud-bigquery decorator==5.1.1 # via @@ -169,7 +169,7 @@ dill==0.3.5.1 # via # feast (setup.py) # multiprocess -distlib==0.3.5 +distlib==0.3.6 # via virtualenv docker==6.0.0 # via @@ -183,19 +183,21 @@ entrypoints==0.4 # via altair execnet==1.9.0 # via pytest-xdist -executing==0.10.0 +executing==1.1.0 # via stack-data -fastapi==0.79.1 +fastapi==0.85.0 # via feast (setup.py) -fastavro==1.6.0 +fastavro==1.6.1 # via # feast (setup.py) # pandavro -fastjsonschema==2.16.1 +fastjsonschema==2.16.2 # via nbformat filelock==3.8.0 - # via virtualenv -firebase-admin==5.2.0 + # via + # snowflake-connector-python + # virtualenv +firebase-admin==5.4.0 # via feast (setup.py) fissix==21.11.13 # via bowler @@ -215,20 +217,21 @@ gcsfs==2022.1.0 # via feast (setup.py) geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.8.2 +google-api-core[grpc]==2.10.1 # via # feast (setup.py) # firebase-admin # google-api-python-client # google-cloud-bigquery # google-cloud-bigquery-storage + # google-cloud-bigtable # google-cloud-core # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.57.0 +google-api-python-client==2.63.0 # via firebase-admin -google-auth==2.10.0 +google-auth==2.12.0 # via # gcsfs # google-api-core @@ -240,63 +243,71 @@ google-auth==2.10.0 # kubernetes google-auth-httplib2==0.1.0 # via google-api-python-client -google-auth-oauthlib==0.5.2 +google-auth-oauthlib==0.5.3 # via gcsfs -google-cloud-bigquery[pandas]==3.3.2 +google-cloud-bigquery[pandas]==3.3.3 # via feast (setup.py) -google-cloud-bigquery-storage==2.14.2 +google-cloud-bigquery-storage==2.16.1 # via # feast (setup.py) # google-cloud-bigquery +google-cloud-bigtable==2.12.0 + # via feast (setup.py) google-cloud-core==2.3.2 # via # google-cloud-bigquery + # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore # google-cloud-storage google-cloud-datastore==2.8.1 # via feast (setup.py) -google-cloud-firestore==2.6.1 +google-cloud-firestore==2.7.0 # via firebase-admin google-cloud-storage==2.5.0 # via # feast (setup.py) # firebase-admin # gcsfs -google-crc32c==1.3.0 +google-crc32c==1.5.0 # via google-resumable-media -google-resumable-media==2.3.3 +google-resumable-media==2.4.0 # via # google-cloud-bigquery # google-cloud-storage -googleapis-common-protos==1.56.4 +googleapis-common-protos[grpc]==1.56.4 # via # feast (setup.py) # google-api-core + # grpc-google-iam-v1 # grpcio-status # tensorflow-metadata great-expectations==0.14.13 # via feast (setup.py) -greenlet==1.1.2 - # via sqlalchemy -grpcio==1.47.0 +grpc-google-iam-v1==0.12.4 + # via google-cloud-bigtable +grpcio==1.49.1 # via # feast (setup.py) # google-api-core # google-cloud-bigquery + # googleapis-common-protos + # grpc-google-iam-v1 # grpcio-reflection # grpcio-status # grpcio-testing # grpcio-tools -grpcio-reflection==1.47.0 +grpcio-reflection==1.48.2 # via feast (setup.py) -grpcio-status==1.47.0 - # via google-api-core -grpcio-testing==1.47.0 +grpcio-status==1.48.2 + # via + # feast (setup.py) + # google-api-core +grpcio-testing==1.48.2 # via feast (setup.py) -grpcio-tools==1.47.0 +grpcio-tools==1.48.2 # via feast (setup.py) -h11==0.13.0 +h11==0.14.0 # via uvicorn happybase==1.2.0 # via feast (setup.py) @@ -306,11 +317,11 @@ httplib2==0.20.4 # via # google-api-python-client # google-auth-httplib2 -httptools==0.4.0 +httptools==0.5.0 # via uvicorn -identify==2.5.3 +identify==2.5.5 # via pre-commit -idna==3.3 +idna==3.4 # via # anyio # requests @@ -322,7 +333,7 @@ importlib-metadata==4.12.0 # via great-expectations iniconfig==1.1.1 # via pytest -ipython==8.4.0 +ipython==8.5.0 # via great-expectations isodate==0.6.1 # via msrest @@ -345,7 +356,7 @@ jsonpatch==1.32 # via great-expectations jsonpointer==2.3 # via jsonpatch -jsonschema==4.13.0 +jsonschema==4.16.0 # via # altair # feast (setup.py) @@ -377,7 +388,7 @@ moreorless==0.4.0 # via bowler moto==3.1.18 # via feast (setup.py) -msal==1.18.0 +msal==1.19.0 # via # azure-identity # msal-extensions @@ -397,7 +408,7 @@ multidict==6.0.2 # yarl multiprocess==0.70.13 # via bytewax -mypy==0.971 +mypy==0.981 # via # feast (setup.py) # sqlalchemy @@ -409,11 +420,11 @@ mypy-protobuf==3.1 # via feast (setup.py) mysqlclient==2.1.1 # via feast (setup.py) -nbformat==5.4.0 +nbformat==5.6.1 # via great-expectations nodeenv==1.7.0 # via pre-commit -numpy==1.23.2 +numpy==1.23.3 # via # altair # db-dtypes @@ -439,7 +450,7 @@ packaging==21.3 # pytest # redis # sphinx -pandas==1.4.3 +pandas==1.4.4 # via # altair # db-dtypes @@ -454,7 +465,7 @@ parso==0.8.3 # via jedi partd==1.3.0 # via dask -pathspec==0.9.0 +pathspec==0.10.1 # via black pbr==5.10.0 # via mock @@ -478,21 +489,23 @@ portalocker==2.5.1 # via msal-extensions pre-commit==2.20.0 # via feast (setup.py) -prompt-toolkit==3.0.30 +prompt-toolkit==3.0.31 # via ipython -proto-plus==1.22.0 +proto-plus==1.22.1 # via # feast (setup.py) # google-cloud-bigquery # google-cloud-bigquery-storage + # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore -protobuf==3.20.2 +protobuf==3.20.3 # via # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage + # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore # googleapis-common-protos @@ -519,7 +532,7 @@ py-cpuinfo==8.0.0 # via pytest-benchmark py4j==0.10.9.5 # via pyspark -pyarrow==6.0.1 +pyarrow==8.0.0 # via # db-dtypes # feast (setup.py) @@ -539,7 +552,7 @@ pycparser==2.21 # via cffi pycryptodomex==3.15.0 # via snowflake-connector-python -pydantic==1.9.2 +pydantic==1.10.2 # via # fastapi # feast (setup.py) @@ -550,7 +563,7 @@ pygments==2.13.0 # feast (setup.py) # ipython # sphinx -pyjwt[crypto]==2.4.0 +pyjwt[crypto]==2.5.0 # via # adal # msal @@ -562,7 +575,9 @@ pymysql==1.0.2 pyodbc==4.0.34 # via feast (setup.py) pyopenssl==22.0.0 - # via snowflake-connector-python + # via + # feast (setup.py) + # snowflake-connector-python pyparsing==2.4.7 # via # great-expectations @@ -572,7 +587,7 @@ pyrsistent==0.18.1 # via jsonschema pyspark==3.3.0 # via feast (setup.py) -pytest==7.1.2 +pytest==7.1.3 # via # feast (setup.py) # pytest-benchmark @@ -585,7 +600,7 @@ pytest==7.1.2 # pytest-xdist pytest-benchmark==3.4.1 # via feast (setup.py) -pytest-cov==3.0.0 +pytest-cov==4.0.0 # via feast (setup.py) pytest-forked==1.4.0 # via pytest-xdist @@ -608,7 +623,7 @@ python-dateutil==2.8.2 # kubernetes # moto # pandas -python-dotenv==0.20.0 +python-dotenv==0.21.0 # via uvicorn pytz==2022.2.1 # via @@ -660,15 +675,15 @@ responses==0.21.0 # via moto rsa==4.9 # via google-auth -ruamel.yaml==0.17.17 +ruamel-yaml==0.17.17 # via great-expectations -ruamel.yaml.clib==0.2.6 - # via ruamel.yaml +ruamel-yaml-clib==0.2.6 + # via ruamel-yaml s3fs==2022.1.0 # via feast (setup.py) s3transfer==0.5.2 # via boto3 -scipy==1.9.0 +scipy==1.9.1 # via great-expectations six==1.16.0 # via @@ -686,11 +701,11 @@ six==1.16.0 # msrestazure # pandavro # python-dateutil -sniffio==1.2.0 +sniffio==1.3.0 # via anyio snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==2.7.8 +snowflake-connector-python[pandas]==2.8.0 # via feast (setup.py) sphinx==4.3.2 # via @@ -710,23 +725,23 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -sqlalchemy[mypy]==1.4.40 +sqlalchemy[mypy]==1.4.41 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a25 +sqlalchemy2-stubs==0.0.2a27 # via sqlalchemy -stack-data==0.4.0 +stack-data==0.5.1 # via ipython -starlette==0.19.1 +starlette==0.20.4 # via fastapi tabulate==0.8.10 # via feast (setup.py) -tenacity==8.0.1 +tenacity==8.1.0 # via feast (setup.py) -tensorflow-metadata==1.9.0 +tensorflow-metadata==1.10.0 # via feast (setup.py) -termcolor==1.1.0 +termcolor==2.0.1 # via great-expectations -testcontainers==3.6.1 +testcontainers==3.7.0 # via feast (setup.py) thriftpy2==0.4.14 # via happybase @@ -747,21 +762,23 @@ toolz==0.12.0 # altair # dask # partd -tqdm==4.64.0 +tqdm==4.64.1 # via # feast (setup.py) # great-expectations -traitlets==5.3.0 +traitlets==5.4.0 # via # ipython # jupyter-core # matplotlib-inline # nbformat -trino==0.315.0 +trino==0.316.0 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -types-protobuf==3.19.22 +types-cryptography==3.3.23 + # via pyjwt +types-protobuf==3.20.4 # via # feast (setup.py) # mypy-protobuf @@ -771,17 +788,17 @@ types-python-dateutil==2.8.19 # via feast (setup.py) types-pytz==2022.2.1.0 # via feast (setup.py) -types-pyyaml==6.0.11 +types-pyyaml==6.0.12 # via feast (setup.py) -types-redis==4.3.14 +types-redis==4.3.21 # via feast (setup.py) -types-requests==2.28.9 +types-requests==2.28.11 # via feast (setup.py) -types-setuptools==65.1.0 +types-setuptools==65.4.0.0 # via feast (setup.py) types-tabulate==0.8.11 # via feast (setup.py) -types-urllib3==1.26.23 +types-urllib3==1.26.25 # via types-requests typing-extensions==4.3.0 # via @@ -791,15 +808,16 @@ typing-extensions==4.3.0 # great-expectations # mypy # pydantic + # snowflake-connector-python # sqlalchemy2-stubs # starlette -tzdata==2022.2 +tzdata==2022.4 # via pytz-deprecation-shim tzlocal==4.2 # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.11 +urllib3==1.26.12 # via # botocore # docker @@ -809,19 +827,20 @@ urllib3==1.26.11 # minio # requests # responses -uvicorn[standard]==0.18.2 + # snowflake-connector-python +uvicorn[standard]==0.18.3 # via feast (setup.py) -uvloop==0.16.0 +uvloop==0.17.0 # via uvicorn -virtualenv==20.16.3 +virtualenv==20.16.5 # via pre-commit volatile==2.1.0 # via bowler -watchfiles==0.16.1 +watchfiles==0.17.0 # via uvicorn wcwidth==0.2.5 # via prompt-toolkit -websocket-client==1.3.3 +websocket-client==1.4.1 # via # docker # kubernetes diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 395302a9f9f..99b0215ffdb 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -20,9 +20,9 @@ bowler==0.9.0 # via feast (setup.py) cachetools==5.2.0 # via google-auth -certifi==2022.6.15 +certifi==2022.9.24 # via requests -charset-normalizer==2.1.0 +charset-normalizer==2.1.1 # via requests click==8.1.3 # via @@ -30,7 +30,7 @@ click==8.1.3 # feast (setup.py) # moreorless # uvicorn -cloudpickle==2.1.0 +cloudpickle==2.2.0 # via dask colorama==0.4.5 # via feast (setup.py) @@ -38,44 +38,42 @@ dask==2022.1.1 # via feast (setup.py) dill==0.3.5.1 # via feast (setup.py) -fastapi==0.79.1 +fastapi==0.85.0 # via feast (setup.py) -fastavro==1.6.0 +fastavro==1.6.1 # via # feast (setup.py) # pandavro fissix==21.11.13 # via bowler -fsspec==2022.7.1 +fsspec==2022.8.2 # via dask -google-api-core==2.8.2 +google-api-core==2.10.1 # via feast (setup.py) -google-auth==2.10.0 +google-auth==2.12.0 # via google-api-core googleapis-common-protos==1.56.4 # via # feast (setup.py) # google-api-core # tensorflow-metadata -greenlet==1.1.2 - # via sqlalchemy -grpcio==1.47.0 +grpcio==1.49.1 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.47.0 +grpcio-reflection==1.48.2 # via feast (setup.py) -h11==0.13.0 +h11==0.14.0 # via uvicorn -httptools==0.4.0 +httptools==0.5.0 # via uvicorn -idna==3.3 +idna==3.4 # via # anyio # requests jinja2==3.1.2 # via feast (setup.py) -jsonschema==4.13.0 +jsonschema==4.16.0 # via feast (setup.py) locket==1.0.0 # via partd @@ -85,11 +83,11 @@ mmh3==3.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==0.971 +mypy==0.981 # via sqlalchemy mypy-extensions==0.4.3 # via mypy -numpy==1.23.2 +numpy==1.23.3 # via # feast (setup.py) # pandas @@ -97,7 +95,7 @@ numpy==1.23.2 # pyarrow packaging==21.3 # via dask -pandas==1.4.3 +pandas==1.5.0 # via # feast (setup.py) # pandavro @@ -105,9 +103,9 @@ pandavro==1.5.2 # via feast (setup.py) partd==1.3.0 # via dask -proto-plus==1.22.0 +proto-plus==1.22.1 # via feast (setup.py) -protobuf==3.20.2 +protobuf==3.20.3 # via # feast (setup.py) # google-api-core @@ -123,7 +121,7 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pydantic==1.9.2 +pydantic==1.10.2 # via # fastapi # feast (setup.py) @@ -135,7 +133,7 @@ pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas -python-dotenv==0.20.0 +python-dotenv==0.21.0 # via uvicorn pytz==2022.2.1 # via pandas @@ -154,19 +152,19 @@ six==1.16.0 # grpcio # pandavro # python-dateutil -sniffio==1.2.0 +sniffio==1.3.0 # via anyio -sqlalchemy[mypy]==1.4.40 +sqlalchemy[mypy]==1.4.41 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a25 +sqlalchemy2-stubs==0.0.2a27 # via sqlalchemy -starlette==0.19.1 +starlette==0.20.4 # via fastapi tabulate==0.8.10 # via feast (setup.py) -tenacity==8.0.1 +tenacity==8.1.0 # via feast (setup.py) -tensorflow-metadata==1.9.0 +tensorflow-metadata==1.10.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -176,7 +174,7 @@ toolz==0.12.0 # via # dask # partd -tqdm==4.64.0 +tqdm==4.64.1 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) @@ -186,15 +184,15 @@ typing-extensions==4.3.0 # pydantic # sqlalchemy2-stubs # starlette -urllib3==1.26.11 +urllib3==1.26.12 # via requests -uvicorn[standard]==0.18.2 +uvicorn[standard]==0.18.3 # via feast (setup.py) -uvloop==0.16.0 +uvloop==0.17.0 # via uvicorn volatile==2.1.0 # via bowler -watchfiles==0.16.1 +watchfiles==0.17.0 # via uvicorn websockets==10.3 # via uvicorn diff --git a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py index 6045dbc6ce0..9dca44dc09f 100644 --- a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py +++ b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py @@ -3,7 +3,7 @@ import boto3 import pytest -from moto import mock_dynamodb +from moto import mock_dynamodb2 from feast.infra.offline_stores.file import FileOfflineStoreConfig from feast.infra.online_stores.dynamodb import ( @@ -159,7 +159,7 @@ def test_dynamodb_table_dynamodb_resource(): assert dynamodb_resource.meta.client.meta.endpoint_url == endpoint_url -@mock_dynamodb +@mock_dynamodb2 @pytest.mark.parametrize("n_samples", [5, 50, 100]) def test_dynamodb_online_store_online_read( repo_config, dynamodb_online_store, n_samples @@ -180,7 +180,7 @@ def test_dynamodb_online_store_online_read( assert [item[1] for item in returned_items] == list(features) -@mock_dynamodb +@mock_dynamodb2 @pytest.mark.parametrize("n_samples", [5, 50, 100]) def test_dynamodb_online_store_online_write_batch( repo_config, dynamodb_online_store, n_samples @@ -207,7 +207,7 @@ def test_dynamodb_online_store_online_write_batch( assert [item[1] for item in stored_items] == list(features) -@mock_dynamodb +@mock_dynamodb2 def test_dynamodb_online_store_update(repo_config, dynamodb_online_store): """Test DynamoDBOnlineStore update method.""" # create dummy table to keep @@ -236,7 +236,7 @@ def test_dynamodb_online_store_update(repo_config, dynamodb_online_store): assert existing_tables[0] == f"test_aws.{db_table_keep_name}" -@mock_dynamodb +@mock_dynamodb2 def test_dynamodb_online_store_teardown(repo_config, dynamodb_online_store): """Test DynamoDBOnlineStore teardown method.""" db_table_delete_name_one = f"{TABLE_NAME}_delete_teardown_1" @@ -262,7 +262,7 @@ def test_dynamodb_online_store_teardown(repo_config, dynamodb_online_store): assert len(existing_tables) == 0 -@mock_dynamodb +@mock_dynamodb2 def test_dynamodb_online_store_online_read_unknown_entity( repo_config, dynamodb_online_store ): @@ -301,7 +301,7 @@ def test_dynamodb_online_store_online_read_unknown_entity( assert returned_items[pos] == (None, None) -@mock_dynamodb +@mock_dynamodb2 def test_write_batch_non_duplicates(repo_config, dynamodb_online_store): """Test DynamoDBOnline Store deduplicate write batch request items.""" dynamodb_tbl = f"{TABLE_NAME}_batch_non_duplicates" @@ -321,7 +321,7 @@ def test_write_batch_non_duplicates(repo_config, dynamodb_online_store): assert len(returned_items) == len(data) -@mock_dynamodb +@mock_dynamodb2 def test_dynamodb_online_store_online_read_unknown_entity_end_of_batch( repo_config, dynamodb_online_store ): diff --git a/setup.py b/setup.py index ab7bb8b6e50..ca8c4fb63ce 100644 --- a/setup.py +++ b/setup.py @@ -53,13 +53,13 @@ "google-api-core>=1.23.0,<3", "googleapis-common-protos>=1.52.*,<2", "grpcio>=1.47.0,<2", - "grpcio-reflection>=1.47.0,<2", + "grpcio-reflection>=1.47.0,<1.49", "Jinja2>=2,<4", "jsonschema", "mmh3", "numpy>=1.22,<3", "pandas>=1.4.3,<2", - "pandavro==1.5.*", # For some reason pandavro higher than 1.5.* only support pandas less than 1.3. + "pandavro==1.5.*", # For some reason pandavro higher than 1.5.* only support pandas less than 1.3. "protobuf<5,>3", "proto-plus>=1.20.0,<2", "pyarrow>=4,<9", @@ -85,6 +85,7 @@ "google-cloud-datastore>=2.1.*,<3", "google-cloud-storage>=1.34.*,<3", "google-cloud-bigtable>=2.11.*,<3", + "grpcio-status<1.49", ] REDIS_REQUIRED = [ @@ -98,6 +99,10 @@ SNOWFLAKE_REQUIRED = [ "snowflake-connector-python[pandas]>=2.7.3,<3", + # `pyOpenSSL==22.1.0` requires `cryptography<39,>=38.0.0`, which is incompatible + # with `snowflake-connector-python[pandas]==2.8.0`, which depends on + # `cryptography<37.0.0,>=3.1.0`. + "pyOpenSSL<22.1.0", ] SPARK_REQUIRED = [ @@ -112,11 +117,7 @@ "psycopg2-binary>=2.8.3,<3", ] -MYSQL_REQUIRED = [ - "mysqlclient", - "pymysql", - "types-PyMySQL" -] +MYSQL_REQUIRED = ["mysqlclient", "pymysql", "types-PyMySQL"] HBASE_REQUIRED = [ "happybase>=1.2.0,<3", @@ -132,15 +133,13 @@ "cffi==1.15.*,<2", ] -AZURE_REQUIRED = ( - [ - "azure-storage-blob>=0.37.0", - "azure-identity>=1.6.1", - "SQLAlchemy>=1.4.19", - "pyodbc>=4.0.30", - "pymssql", - ] -) +AZURE_REQUIRED = [ + "azure-storage-blob>=0.37.0", + "azure-identity>=1.6.1", + "SQLAlchemy>=1.4.19", + "pyodbc>=4.0.30", + "pymssql", +] CI_REQUIRED = ( [ @@ -149,11 +148,11 @@ "flake8", "black>=22.6.0,<23", "isort>=5,<6", - "grpcio-tools>=1.47.0", - "grpcio-testing>=1.47.0", + "grpcio-tools>=1.47.0,<1.49", + "grpcio-testing>=1.47.0,<1.49", "minio==7.1.0", "mock==2.0.0", - "moto", + "moto<4", "mypy>=0.931", "mypy-protobuf==3.1", "avro==1.10.0", From cce36029655de90cc14f538d9ae2375a784b5d41 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Fri, 30 Sep 2022 17:20:11 -0400 Subject: [PATCH 18/24] Don't bother materializing created timestamp. Had a discussion with Danny about whether it's useful to copy this column. He agreed that there's no value to storing this in the online store. Signed-off-by: Abhin Chhabra --- sdk/python/feast/infra/online_stores/bigtable.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/sdk/python/feast/infra/online_stores/bigtable.py b/sdk/python/feast/infra/online_stores/bigtable.py index a741764f3ad..e08bc44bdbe 100644 --- a/sdk/python/feast/infra/online_stores/bigtable.py +++ b/sdk/python/feast/infra/online_stores/bigtable.py @@ -135,11 +135,8 @@ def online_write_batch( bt_table = bt_instance.table(bt_table_name) # `columns_per_row` is used to calculate the number of rows we are allowed to - # mutate in one request. Since `MUTATIONS_PER_OP` is set much lower than the max - # allowed value, the calculation of `columns_per_row` doesn't need to be - # precise. Feature views can have 1 or 2 timestamp fields: event timestamp and - # created timestamp. We assume 2 conservatively. - columns_per_row = len(feature_view.features) + 2 # extra for 2 timestamps + # mutate in one request. + columns_per_row = len(feature_view.features) + 1 # extra for event timestamp rows_per_write = MUTATIONS_PER_OP // columns_per_row with futures.ThreadPoolExecutor( @@ -198,13 +195,6 @@ def _write_rows_to_bt( b"event_ts", utils.make_tzaware(timestamp).isoformat().encode(), ) - bt_row.set_cell( - self.feature_column_family, - b"created_ts", - utils.make_tzaware(created_ts).isoformat().encode() - if created_ts is not None - else b"", - ) rows.append(bt_row) bt_table.mutate_rows(rows) From 943ee3f4e50a6797d6b7ae41d52402a4b6adb4cc Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Fri, 30 Sep 2022 18:11:02 -0400 Subject: [PATCH 19/24] Remove `tensorflow-metadata`. Turns out that this dependency is not required. We removed all references to it in [this PR](https://github.com/feast-dev/feast/pull/2063), but did not remove it from `setup.py`. Removing it has caused many of the restrictions imposed in previous commits to be unnecessary. Signed-off-by: Abhin Chhabra --- .../requirements/py3.10-ci-requirements.txt | 20 ++++++------------- .../requirements/py3.10-requirements.txt | 10 ++-------- .../requirements/py3.8-ci-requirements.txt | 20 ++++++------------- .../requirements/py3.8-requirements.txt | 10 ++-------- .../requirements/py3.9-ci-requirements.txt | 20 ++++++------------- .../requirements/py3.9-requirements.txt | 10 ++-------- setup.py | 8 +++----- 7 files changed, 27 insertions(+), 71 deletions(-) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index f6ba49d2af7..93729efc518 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -4,8 +4,6 @@ # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.10-ci-requirements.txt # -absl-py==1.2.0 - # via tensorflow-metadata adal==1.2.7 # via # azure-datalake-store @@ -281,7 +279,6 @@ googleapis-common-protos[grpc]==1.56.4 # google-api-core # grpc-google-iam-v1 # grpcio-status - # tensorflow-metadata great-expectations==0.14.13 # via feast (setup.py) grpc-google-iam-v1==0.12.4 @@ -297,15 +294,13 @@ grpcio==1.49.1 # grpcio-status # grpcio-testing # grpcio-tools -grpcio-reflection==1.48.2 +grpcio-reflection==1.49.1 # via feast (setup.py) -grpcio-status==1.48.2 - # via - # feast (setup.py) - # google-api-core -grpcio-testing==1.48.2 +grpcio-status==1.49.1 + # via google-api-core +grpcio-testing==1.49.1 # via feast (setup.py) -grpcio-tools==1.48.2 +grpcio-tools==1.49.1 # via feast (setup.py) h11==0.14.0 # via uvicorn @@ -499,7 +494,7 @@ proto-plus==1.22.1 # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore -protobuf==3.20.3 +protobuf==4.21.7 # via # feast (setup.py) # google-api-core @@ -515,7 +510,6 @@ protobuf==3.20.3 # grpcio-tools # mypy-protobuf # proto-plus - # tensorflow-metadata psutil==5.9.0 # via feast (setup.py) psycopg2-binary==2.9.3 @@ -735,8 +729,6 @@ tabulate==0.8.10 # via feast (setup.py) tenacity==8.1.0 # via feast (setup.py) -tensorflow-metadata==1.10.0 - # via feast (setup.py) termcolor==2.0.1 # via great-expectations testcontainers==3.7.0 diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 8b1928bb092..4770b765e50 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -4,8 +4,6 @@ # # pip-compile --output-file=sdk/python/requirements/py3.10-requirements.txt # -absl-py==1.2.0 - # via tensorflow-metadata anyio==3.6.1 # via # starlette @@ -56,12 +54,11 @@ googleapis-common-protos==1.56.4 # via # feast (setup.py) # google-api-core - # tensorflow-metadata grpcio==1.49.1 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.48.2 +grpcio-reflection==1.49.1 # via feast (setup.py) h11==0.14.0 # via uvicorn @@ -105,14 +102,13 @@ partd==1.3.0 # via dask proto-plus==1.22.1 # via feast (setup.py) -protobuf==3.20.3 +protobuf==4.21.7 # via # feast (setup.py) # google-api-core # googleapis-common-protos # grpcio-reflection # proto-plus - # tensorflow-metadata pyarrow==8.0.0 # via feast (setup.py) pyasn1==0.4.8 @@ -164,8 +160,6 @@ tabulate==0.8.10 # via feast (setup.py) tenacity==8.1.0 # via feast (setup.py) -tensorflow-metadata==1.10.0 - # via feast (setup.py) toml==0.10.2 # via feast (setup.py) tomli==2.0.1 diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 914fb198331..e1b99210aa5 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -4,8 +4,6 @@ # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.8-ci-requirements.txt # -absl-py==1.2.0 - # via tensorflow-metadata adal==1.2.7 # via # azure-datalake-store @@ -285,7 +283,6 @@ googleapis-common-protos[grpc]==1.56.4 # google-api-core # grpc-google-iam-v1 # grpcio-status - # tensorflow-metadata great-expectations==0.14.13 # via feast (setup.py) grpc-google-iam-v1==0.12.4 @@ -301,15 +298,13 @@ grpcio==1.49.1 # grpcio-status # grpcio-testing # grpcio-tools -grpcio-reflection==1.48.2 +grpcio-reflection==1.49.1 # via feast (setup.py) -grpcio-status==1.48.2 - # via - # feast (setup.py) - # google-api-core -grpcio-testing==1.48.2 +grpcio-status==1.49.1 + # via google-api-core +grpcio-testing==1.49.1 # via feast (setup.py) -grpcio-tools==1.48.2 +grpcio-tools==1.49.1 # via feast (setup.py) h11==0.14.0 # via uvicorn @@ -507,7 +502,7 @@ proto-plus==1.22.1 # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore -protobuf==3.20.3 +protobuf==4.21.7 # via # feast (setup.py) # google-api-core @@ -523,7 +518,6 @@ protobuf==3.20.3 # grpcio-tools # mypy-protobuf # proto-plus - # tensorflow-metadata psutil==5.9.0 # via feast (setup.py) psycopg2-binary==2.9.3 @@ -745,8 +739,6 @@ tabulate==0.8.10 # via feast (setup.py) tenacity==8.1.0 # via feast (setup.py) -tensorflow-metadata==1.10.0 - # via feast (setup.py) termcolor==2.0.1 # via great-expectations testcontainers==3.7.0 diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index d2e40bcf397..9a321bf9a50 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -4,8 +4,6 @@ # # pip-compile --output-file=sdk/python/requirements/py3.8-requirements.txt # -absl-py==1.2.0 - # via tensorflow-metadata anyio==3.6.1 # via # starlette @@ -56,12 +54,11 @@ googleapis-common-protos==1.56.4 # via # feast (setup.py) # google-api-core - # tensorflow-metadata grpcio==1.49.1 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.48.2 +grpcio-reflection==1.49.1 # via feast (setup.py) h11==0.14.0 # via uvicorn @@ -109,14 +106,13 @@ pkgutil-resolve-name==1.3.10 # via jsonschema proto-plus==1.22.1 # via feast (setup.py) -protobuf==3.20.3 +protobuf==4.21.7 # via # feast (setup.py) # google-api-core # googleapis-common-protos # grpcio-reflection # proto-plus - # tensorflow-metadata pyarrow==8.0.0 # via feast (setup.py) pyasn1==0.4.8 @@ -168,8 +164,6 @@ tabulate==0.8.10 # via feast (setup.py) tenacity==8.1.0 # via feast (setup.py) -tensorflow-metadata==1.10.0 - # via feast (setup.py) toml==0.10.2 # via feast (setup.py) tomli==2.0.1 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 8e42d0dc79b..af7b79ea229 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -4,8 +4,6 @@ # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.9-ci-requirements.txt # -absl-py==1.2.0 - # via tensorflow-metadata adal==1.2.7 # via # azure-datalake-store @@ -281,7 +279,6 @@ googleapis-common-protos[grpc]==1.56.4 # google-api-core # grpc-google-iam-v1 # grpcio-status - # tensorflow-metadata great-expectations==0.14.13 # via feast (setup.py) grpc-google-iam-v1==0.12.4 @@ -297,15 +294,13 @@ grpcio==1.49.1 # grpcio-status # grpcio-testing # grpcio-tools -grpcio-reflection==1.48.2 +grpcio-reflection==1.49.1 # via feast (setup.py) -grpcio-status==1.48.2 - # via - # feast (setup.py) - # google-api-core -grpcio-testing==1.48.2 +grpcio-status==1.49.1 + # via google-api-core +grpcio-testing==1.49.1 # via feast (setup.py) -grpcio-tools==1.48.2 +grpcio-tools==1.49.1 # via feast (setup.py) h11==0.14.0 # via uvicorn @@ -499,7 +494,7 @@ proto-plus==1.22.1 # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore -protobuf==3.20.3 +protobuf==4.21.7 # via # feast (setup.py) # google-api-core @@ -515,7 +510,6 @@ protobuf==3.20.3 # grpcio-tools # mypy-protobuf # proto-plus - # tensorflow-metadata psutil==5.9.0 # via feast (setup.py) psycopg2-binary==2.9.3 @@ -737,8 +731,6 @@ tabulate==0.8.10 # via feast (setup.py) tenacity==8.1.0 # via feast (setup.py) -tensorflow-metadata==1.10.0 - # via feast (setup.py) termcolor==2.0.1 # via great-expectations testcontainers==3.7.0 diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 99b0215ffdb..3b5135b602d 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -4,8 +4,6 @@ # # pip-compile --output-file=sdk/python/requirements/py3.9-requirements.txt # -absl-py==1.2.0 - # via tensorflow-metadata anyio==3.6.1 # via # starlette @@ -56,12 +54,11 @@ googleapis-common-protos==1.56.4 # via # feast (setup.py) # google-api-core - # tensorflow-metadata grpcio==1.49.1 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.48.2 +grpcio-reflection==1.49.1 # via feast (setup.py) h11==0.14.0 # via uvicorn @@ -105,14 +102,13 @@ partd==1.3.0 # via dask proto-plus==1.22.1 # via feast (setup.py) -protobuf==3.20.3 +protobuf==4.21.7 # via # feast (setup.py) # google-api-core # googleapis-common-protos # grpcio-reflection # proto-plus - # tensorflow-metadata pyarrow==8.0.0 # via feast (setup.py) pyasn1==0.4.8 @@ -164,8 +160,6 @@ tabulate==0.8.10 # via feast (setup.py) tenacity==8.1.0 # via feast (setup.py) -tensorflow-metadata==1.10.0 - # via feast (setup.py) toml==0.10.2 # via feast (setup.py) tomli==2.0.1 diff --git a/setup.py b/setup.py index ca8c4fb63ce..2b09bcbe946 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ "google-api-core>=1.23.0,<3", "googleapis-common-protos>=1.52.*,<2", "grpcio>=1.47.0,<2", - "grpcio-reflection>=1.47.0,<1.49", + "grpcio-reflection>=1.47.0,<2", "Jinja2>=2,<4", "jsonschema", "mmh3", @@ -74,7 +74,6 @@ "typeguard", "fastapi>=0.68.0,<1", "uvicorn[standard]>=0.14.0,<1", - "tensorflow-metadata>=1.0.0,<2.0.0", "dask>=2021.*,<2022.02.0", "bowler", # Needed for automatic repo upgrades ] @@ -85,7 +84,6 @@ "google-cloud-datastore>=2.1.*,<3", "google-cloud-storage>=1.34.*,<3", "google-cloud-bigtable>=2.11.*,<3", - "grpcio-status<1.49", ] REDIS_REQUIRED = [ @@ -148,8 +146,8 @@ "flake8", "black>=22.6.0,<23", "isort>=5,<6", - "grpcio-tools>=1.47.0,<1.49", - "grpcio-testing>=1.47.0,<1.49", + "grpcio-tools>=1.47.0", + "grpcio-testing>=1.47.0", "minio==7.1.0", "mock==2.0.0", "moto<4", From ab80b429c5d7e2933f53d8cff444230e428c5196 Mon Sep 17 00:00:00 2001 From: Abhin Chhabra Date: Wed, 5 Oct 2022 09:48:27 -0400 Subject: [PATCH 20/24] Minor fix to Bigtable documentation. Feedback from Danny mentioned that Bigtable should be able to store multiple versions of the same key and fetch the latest at read time. This makes sense and means that concurrent writes should work just fine. Signed-off-by: Abhin Chhabra --- docs/reference/online-stores/bigtable.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/online-stores/bigtable.md b/docs/reference/online-stores/bigtable.md index d03426e102b..0d6e7cfb13b 100644 --- a/docs/reference/online-stores/bigtable.md +++ b/docs/reference/online-stores/bigtable.md @@ -46,7 +46,7 @@ Below is a matrix indicating which functionality is supported by the Bigtable on | readable by Java | no | | readable by Go | no | | support for entityless feature views | yes | -| support for concurrent writing to the same key | no | +| support for concurrent writing to the same key | yes | | support for ttl (time to live) at retrieval | no | | support for deleting expired data | no | | collocated by feature view | yes | From 4755745bf5e0b02a6ea837581e9868a7a2912e9c Mon Sep 17 00:00:00 2001 From: Danny Chiao Date: Wed, 5 Oct 2022 10:05:48 -0400 Subject: [PATCH 21/24] update roadmap docs Signed-off-by: Danny Chiao --- README.md | 2 +- docs/SUMMARY.md | 1 + docs/reference/online-stores/README.md | 4 ++++ docs/roadmap.md | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 182637018f1..e08bb5caa2e 100644 --- a/README.md +++ b/README.md @@ -178,7 +178,7 @@ The list below contains the functionality that contributors are planning to deve * [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/online-stores/postgres) * [x] [Custom online store support](https://docs.feast.dev/how-to-guides/adding-support-for-a-new-online-store) * [x] [Cassandra / AstraDB](https://docs.feast.dev/reference/online-stores/cassandra) - * [ ] Bigtable (in progress) + * [x] [Bigtable] (https://docs.feast.dev/reference/online-stores/bigtable) * **Feature Engineering** * [x] On-demand Transformations (Alpha release. See [RFC](https://docs.google.com/document/d/1lgfIw0Drc65LpaxbUu49RCeJgMew547meSJttnUqz7c/edit#)) * [x] Streaming Transformations (Alpha release. See [RFC](https://docs.google.com/document/d/1UzEyETHUaGpn0ap4G82DHluiCj7zEbrQLkJJkKSv4e8/edit)) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 2b58fb277e3..cb4a2664cd6 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -92,6 +92,7 @@ * [Redis](reference/online-stores/redis.md) * [Datastore](reference/online-stores/datastore.md) * [DynamoDB](reference/online-stores/dynamodb.md) + * [Bigtable](reference/online-stores/bigtable.md) * [PostgreSQL (contrib)](reference/online-stores/postgres.md) * [Cassandra + Astra DB (contrib)](reference/online-stores/cassandra.md) * [MySQL (contrib)](reference/online-stores/mysql.md) diff --git a/docs/reference/online-stores/README.md b/docs/reference/online-stores/README.md index 2ade4f0de9b..e46fc28d162 100644 --- a/docs/reference/online-stores/README.md +++ b/docs/reference/online-stores/README.md @@ -26,6 +26,10 @@ Please see [Online Store](../../getting-started/architecture-and-components/onli [dynamodb.md](dynamodb.md) {% endcontent-ref %} +{% content-ref url="bigtable.md" %} +[bigtable.md](mysql.md) +{% endcontent-ref %} + {% content-ref url="postgres.md" %} [postgres.md](postgres.md) {% endcontent-ref %} diff --git a/docs/roadmap.md b/docs/roadmap.md index 30f4317054b..c69242baca8 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -36,7 +36,7 @@ The list below contains the functionality that contributors are planning to deve * [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/online-stores/postgres) * [x] [Custom online store support](https://docs.feast.dev/how-to-guides/adding-support-for-a-new-online-store) * [x] [Cassandra / AstraDB](https://docs.feast.dev/reference/online-stores/cassandra) - * [ ] Bigtable (in progress) + * [x] [Bigtable] (https://docs.feast.dev/reference/online-stores/bigtable) * **Feature Engineering** * [x] On-demand Transformations (Alpha release. See [RFC](https://docs.google.com/document/d/1lgfIw0Drc65LpaxbUu49RCeJgMew547meSJttnUqz7c/edit#)) * [x] Streaming Transformations (Alpha release. See [RFC](https://docs.google.com/document/d/1UzEyETHUaGpn0ap4G82DHluiCj7zEbrQLkJJkKSv4e8/edit)) From c0f2d8e7b2377365dbb060c35823e983b92633b9 Mon Sep 17 00:00:00 2001 From: Danny Chiao Date: Wed, 5 Oct 2022 13:23:59 -0400 Subject: [PATCH 22/24] Fix roadmap doc Signed-off-by: Danny Chiao --- README.md | 2 +- docs/roadmap.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e08bb5caa2e..fc0830ce121 100644 --- a/README.md +++ b/README.md @@ -178,7 +178,7 @@ The list below contains the functionality that contributors are planning to deve * [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/online-stores/postgres) * [x] [Custom online store support](https://docs.feast.dev/how-to-guides/adding-support-for-a-new-online-store) * [x] [Cassandra / AstraDB](https://docs.feast.dev/reference/online-stores/cassandra) - * [x] [Bigtable] (https://docs.feast.dev/reference/online-stores/bigtable) + * [x] [Bigtable](https://docs.feast.dev/reference/online-stores/bigtable) * **Feature Engineering** * [x] On-demand Transformations (Alpha release. See [RFC](https://docs.google.com/document/d/1lgfIw0Drc65LpaxbUu49RCeJgMew547meSJttnUqz7c/edit#)) * [x] Streaming Transformations (Alpha release. See [RFC](https://docs.google.com/document/d/1UzEyETHUaGpn0ap4G82DHluiCj7zEbrQLkJJkKSv4e8/edit)) diff --git a/docs/roadmap.md b/docs/roadmap.md index c69242baca8..95efccb665a 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -36,7 +36,7 @@ The list below contains the functionality that contributors are planning to deve * [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/online-stores/postgres) * [x] [Custom online store support](https://docs.feast.dev/how-to-guides/adding-support-for-a-new-online-store) * [x] [Cassandra / AstraDB](https://docs.feast.dev/reference/online-stores/cassandra) - * [x] [Bigtable] (https://docs.feast.dev/reference/online-stores/bigtable) + * [x] [Bigtable](https://docs.feast.dev/reference/online-stores/bigtable) * **Feature Engineering** * [x] On-demand Transformations (Alpha release. See [RFC](https://docs.google.com/document/d/1lgfIw0Drc65LpaxbUu49RCeJgMew547meSJttnUqz7c/edit#)) * [x] Streaming Transformations (Alpha release. See [RFC](https://docs.google.com/document/d/1UzEyETHUaGpn0ap4G82DHluiCj7zEbrQLkJJkKSv4e8/edit)) From 2d6bdac28bccfac1d427dfcd9ae9774c89bec947 Mon Sep 17 00:00:00 2001 From: Danny Chiao Date: Wed, 5 Oct 2022 13:26:39 -0400 Subject: [PATCH 23/24] Change link to point to roadmap page Signed-off-by: Danny Chiao --- docs/getting-started/third-party-integrations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting-started/third-party-integrations.md b/docs/getting-started/third-party-integrations.md index 8e6a600aa05..ca974880ed3 100644 --- a/docs/getting-started/third-party-integrations.md +++ b/docs/getting-started/third-party-integrations.md @@ -11,7 +11,7 @@ Don't see your offline store or online store of choice here? Check out our guide ## Integrations -See [Functionality and Roadmap](../../#-functionality-and-roadmap) +See [Functionality and Roadmap](../roadmap.md) ## Standards From 992c318a18d58ceb94e99f5469cd6b631d485d21 Mon Sep 17 00:00:00 2001 From: Danny Chiao Date: Wed, 5 Oct 2022 13:29:45 -0400 Subject: [PATCH 24/24] change order in roadmap Signed-off-by: Danny Chiao --- README.md | 4 ++-- docs/roadmap.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fc0830ce121..d57247c60b5 100644 --- a/README.md +++ b/README.md @@ -173,12 +173,12 @@ The list below contains the functionality that contributors are planning to deve * [x] [DynamoDB](https://docs.feast.dev/reference/online-stores/dynamodb) * [x] [Redis](https://docs.feast.dev/reference/online-stores/redis) * [x] [Datastore](https://docs.feast.dev/reference/online-stores/datastore) + * [x] [Bigtable](https://docs.feast.dev/reference/online-stores/bigtable) * [x] [SQLite](https://docs.feast.dev/reference/online-stores/sqlite) * [x] [Azure Cache for Redis (community plugin)](https://github.com/Azure/feast-azure) * [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/online-stores/postgres) + * [x] [Cassandra / AstraDB (contrib plugin)](https://docs.feast.dev/reference/online-stores/cassandra) * [x] [Custom online store support](https://docs.feast.dev/how-to-guides/adding-support-for-a-new-online-store) - * [x] [Cassandra / AstraDB](https://docs.feast.dev/reference/online-stores/cassandra) - * [x] [Bigtable](https://docs.feast.dev/reference/online-stores/bigtable) * **Feature Engineering** * [x] On-demand Transformations (Alpha release. See [RFC](https://docs.google.com/document/d/1lgfIw0Drc65LpaxbUu49RCeJgMew547meSJttnUqz7c/edit#)) * [x] Streaming Transformations (Alpha release. See [RFC](https://docs.google.com/document/d/1UzEyETHUaGpn0ap4G82DHluiCj7zEbrQLkJJkKSv4e8/edit)) diff --git a/docs/roadmap.md b/docs/roadmap.md index 95efccb665a..cea646a8f5f 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -31,12 +31,12 @@ The list below contains the functionality that contributors are planning to deve * [x] [DynamoDB](https://docs.feast.dev/reference/online-stores/dynamodb) * [x] [Redis](https://docs.feast.dev/reference/online-stores/redis) * [x] [Datastore](https://docs.feast.dev/reference/online-stores/datastore) + * [x] [Bigtable](https://docs.feast.dev/reference/online-stores/bigtable) * [x] [SQLite](https://docs.feast.dev/reference/online-stores/sqlite) * [x] [Azure Cache for Redis (community plugin)](https://github.com/Azure/feast-azure) * [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/online-stores/postgres) + * [x] [Cassandra / AstraDB (contrib plugin)](https://docs.feast.dev/reference/online-stores/cassandra) * [x] [Custom online store support](https://docs.feast.dev/how-to-guides/adding-support-for-a-new-online-store) - * [x] [Cassandra / AstraDB](https://docs.feast.dev/reference/online-stores/cassandra) - * [x] [Bigtable](https://docs.feast.dev/reference/online-stores/bigtable) * **Feature Engineering** * [x] On-demand Transformations (Alpha release. See [RFC](https://docs.google.com/document/d/1lgfIw0Drc65LpaxbUu49RCeJgMew547meSJttnUqz7c/edit#)) * [x] Streaming Transformations (Alpha release. See [RFC](https://docs.google.com/document/d/1UzEyETHUaGpn0ap4G82DHluiCj7zEbrQLkJJkKSv4e8/edit))