From e04a206ea686e87adbd0699f44b0346155aecc9b Mon Sep 17 00:00:00 2001 From: Danny Chiao Date: Sun, 7 Aug 2022 11:13:35 -0500 Subject: [PATCH] fix: Fix Feast Java inconsistency with int64 serialization vs python Signed-off-by: Danny Chiao --- .../feature_repo/application-override.yaml | 1 + .../java-demo/feature_repo/driver_repo.py | 20 +++----------- .../java-demo/feature_repo/feature_store.yaml | 1 + .../feature_repo/test_python_fetch.py | 26 +++++++++++++++++++ java/serving/README.md | 4 +-- .../retriever/EntityKeySerializerV2.java | 3 ++- sdk/python/feast/feature_store.py | 2 +- 7 files changed, 37 insertions(+), 20 deletions(-) create mode 100644 examples/java-demo/feature_repo/test_python_fetch.py diff --git a/examples/java-demo/feature_repo/application-override.yaml b/examples/java-demo/feature_repo/application-override.yaml index dbdeda4c04f..5a43d886dcd 100644 --- a/examples/java-demo/feature_repo/application-override.yaml +++ b/examples/java-demo/feature_repo/application-override.yaml @@ -10,6 +10,7 @@ feature-server: host: my-redis-master port: 6379 password: [YOUR PASSWORD] + entityKeySerializationVersion: 2 global: registry: path: gs://[YOUR BUCKET]/demo-repo/registry.db diff --git a/examples/java-demo/feature_repo/driver_repo.py b/examples/java-demo/feature_repo/driver_repo.py index 94bb6c16f50..f7dd05afff7 100644 --- a/examples/java-demo/feature_repo/driver_repo.py +++ b/examples/java-demo/feature_repo/driver_repo.py @@ -3,13 +3,11 @@ import pandas as pd from feast.data_source import RequestSource -from feast.field import Field from feast.on_demand_feature_view import on_demand_feature_view -from feast.request_feature_view import RequestFeatureView from feast.types import Float32, Float64, Int64, String from feast.field import Field -from feast import Entity, Feature, BatchFeatureView, FileSource +from feast import Entity, FileSource, FeatureView driver_hourly_stats = FileSource( path="data/driver_stats_with_string.parquet", @@ -17,10 +15,10 @@ created_timestamp_column="created", ) driver = Entity(name="driver_id", description="driver id",) -driver_hourly_stats_view = BatchFeatureView( +driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", entities=[driver], - ttl=timedelta(seconds=86400000), + ttl=timedelta(days=365), schema=[ Field(name="conv_rate", dtype=Float32), Field(name="acc_rate", dtype=Float32), @@ -42,6 +40,7 @@ ], ) + # Define an on demand feature view which can generate new features based on # existing feature views and RequestSource features @on_demand_feature_view( @@ -60,14 +59,3 @@ def transformed_conv_rate(inputs: pd.DataFrame) -> pd.DataFrame: df["conv_rate_plus_val2"] = inputs["conv_rate"] + inputs["val_to_add_2"] return df - -# Define request feature view -driver_age_request_fv = RequestFeatureView( - name="driver_age", - request_data_source=RequestSource( - name="driver_age", - schema=[ - Field(name="driver_age", dtype=Int64), - ], - ), -) diff --git a/examples/java-demo/feature_repo/feature_store.yaml b/examples/java-demo/feature_repo/feature_store.yaml index 03e7c5cc9cb..cfb0a89e8df 100644 --- a/examples/java-demo/feature_repo/feature_store.yaml +++ b/examples/java-demo/feature_repo/feature_store.yaml @@ -6,3 +6,4 @@ online_store: connection_string: localhost:6379,password=[YOUR PASSWORD] offline_store: type: file +entity_key_serialization_version: 2 \ No newline at end of file diff --git a/examples/java-demo/feature_repo/test_python_fetch.py b/examples/java-demo/feature_repo/test_python_fetch.py new file mode 100644 index 00000000000..5e2781e1508 --- /dev/null +++ b/examples/java-demo/feature_repo/test_python_fetch.py @@ -0,0 +1,26 @@ +from feast import FeatureStore + + +def run_demo(): + store = FeatureStore(repo_path=".") + + print("\n--- Online features ---") + features = store.get_online_features( + features=[ + "driver_hourly_stats:conv_rate", + ], + entity_rows=[ + { + "driver_id": 1001, + }, + { + "driver_id": 1002, + } + ], + ).to_dict() + for key, value in sorted(features.items()): + print(key, " : ", value) + + +if __name__ == "__main__": + run_demo() diff --git a/java/serving/README.md b/java/serving/README.md index 5ac7194924f..a0d87563a95 100644 --- a/java/serving/README.md +++ b/java/serving/README.md @@ -41,7 +41,7 @@ From the Feast GitHub root, run: java \ -Xms1g \ -Xmx4g \ - -jar java/serving/target/feast-serving-0.17.1-SNAPSHOT-jar-with-dependencies.jar \ + -jar java/serving/target/feast-serving-[YOUR VERSION]-jar-with-dependencies.jar \ classpath:/application.yml,file:./application-override.yaml ``` 5. Now you have a Feast Serving gRPC service running on port 6566 locally! @@ -124,7 +124,7 @@ You can debug this like any other Java executable. Swap the java command above w -Xrunjdwp:transport=dt_socket,address=5005,server=y,suspend=y \ -Xms1g \ -Xmx4g \ - -jar java/serving/target/feast-serving-0.17.1-SNAPSHOT-jar-with-dependencies.jar \ + -jar java/serving/target/feast-serving-[YOUR VERSION]-jar-with-dependencies.jar \ classpath:/application.yml,file:./application-override.yaml ``` Now you can attach e.g. a Remote debugger in IntelliJ to port 5005 to debug / make breakpoints. diff --git a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java b/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java index f99e5cbdb1e..5afc4040087 100644 --- a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java +++ b/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java @@ -87,14 +87,15 @@ public byte[] serialize(RedisProto.RedisKeyV2 entityKey) { break; case INT64_VAL: buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.INT64.getNumber())); - buffer.addAll(encodeInteger(Integer.BYTES)); /* This is super dumb - but in https://github.com/feast-dev/feast/blob/dcae1606f53028ce5413567fb8b66f92cfef0f8e/sdk/python/feast/infra/key_encoding_utils.py#L9 we use `struct.pack("