From 9f12f9e7136ab9a73729dbdf23c9a2a4ab239dfe Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Wed, 10 Apr 2024 18:21:11 +0000 Subject: [PATCH 1/3] fix: loc setitem dtype issue. --- bigframes/core/indexers.py | 2 +- tests/system/small/test_dataframe.py | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index da6f3f3740..af09fce22d 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -192,7 +192,7 @@ def __setitem__( and isinstance(key[0], bigframes.series.Series) and key[0].dtype == "boolean" ) and pd.api.types.is_scalar(value): - new_column = key[0].map({True: value, False: None}) + new_column = key[0].map({True: value, False: pd.NA}) try: original_column = self._dataframe[key[1]] except KeyError: diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index ba205078ed..e70764fcc0 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -2918,15 +2918,23 @@ def test_loc_setitem_bool_series_scalar_new_col(scalars_dfs): ) -def test_loc_setitem_bool_series_scalar_existing_col(scalars_dfs): +@pytest.mark.parametrize( + ("col", "value"), + [ + ("string_col", "hello"), + ("int64_col", 3), + ("float64_col", 3.5), + ], +) +def test_loc_setitem_bool_series_scalar_existing_col(scalars_dfs, col, value): if pd.__version__.startswith("1."): pytest.skip("this loc overload not supported in pandas 1.x.") scalars_df, scalars_pandas_df = scalars_dfs bf_df = scalars_df.copy() pd_df = scalars_pandas_df.copy() - bf_df.loc[bf_df["int64_too"] == 1, "string_col"] = "hello" - pd_df.loc[pd_df["int64_too"] == 1, "string_col"] = "hello" + bf_df.loc[bf_df["int64_too"] == 1, col] = value + pd_df.loc[pd_df["int64_too"] == 1, col] = value pd.testing.assert_frame_equal( bf_df.to_pandas(), From 57e0fb64126f0813bbb32c89dac5ec3d84317443 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Wed, 10 Apr 2024 19:03:24 +0000 Subject: [PATCH 2/3] Update NaN selection --- bigframes/core/indexers.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index af09fce22d..bc03bd1df0 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -192,7 +192,15 @@ def __setitem__( and isinstance(key[0], bigframes.series.Series) and key[0].dtype == "boolean" ) and pd.api.types.is_scalar(value): - new_column = key[0].map({True: value, False: pd.NA}) + # For integer scalar, if set value to a new column, the dtype would be default to float. + # But if set value to an existing Int64 column, the dtype would still be integer. + # So we need to use different NaN type to match this behavior. + new_column = key[0].map( + { + True: value, + False: pd.NA if key[1] in self._dataframe.columns else None, + } + ) try: original_column = self._dataframe[key[1]] except KeyError: From 68c15525faed6b10d7e29d22c19790e33d9d4ae0 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Wed, 10 Apr 2024 20:29:32 +0000 Subject: [PATCH 3/3] Update code example --- third_party/bigframes_vendored/pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py index 61bc39bb12..baa9534a0e 100644 --- a/third_party/bigframes_vendored/pandas/core/generic.py +++ b/third_party/bigframes_vendored/pandas/core/generic.py @@ -662,9 +662,9 @@ def copy(self): >>> df.loc[df["b"] == 2, "b"] = 22 >>> df - a b - 0 1 22.0 - 1 3 4.0 + a b + 0 1 22 + 1 3 4 [2 rows x 2 columns] >>> df_copy