From d6b21bc54c523b0bc9f9bca30e61585043b60400 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Sat, 13 Apr 2024 23:13:06 +0000 Subject: [PATCH 1/7] feat: creats bigquery namespace and adds bigquery.array_length function --- bigframes/bigquery/__init__.py | 21 ++++++++++ bigframes/bigquery/array.py | 49 +++++++++++++++++++++++ tests/system/small/bigquery/__init__.py | 13 ++++++ tests/system/small/bigquery/test_array.py | 30 ++++++++++++++ 4 files changed, 113 insertions(+) create mode 100644 bigframes/bigquery/__init__.py create mode 100644 bigframes/bigquery/array.py create mode 100644 tests/system/small/bigquery/__init__.py create mode 100644 tests/system/small/bigquery/test_array.py diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py new file mode 100644 index 0000000000..688f60ca0d --- /dev/null +++ b/bigframes/bigquery/__init__.py @@ -0,0 +1,21 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import bigframes.bigquery.array as array + +array_length = array.array_length + +__all___ = [ + "array_length", +] diff --git a/bigframes/bigquery/array.py b/bigframes/bigquery/array.py new file mode 100644 index 0000000000..2e49842dea --- /dev/null +++ b/bigframes/bigquery/array.py @@ -0,0 +1,49 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module defines BigQuery built-in array functions: +https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions.""" + +from __future__ import annotations + +import typing + +import bigframes.operations as ops + +if typing.TYPE_CHECKING: + import bigframes.series as series + + +def array_length(series: series.Series) -> series.Series: + """Compute the length of each array element in the Series. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([[1, 2, 8, 3], [], [3, 4]]) + >>> bbq.array_length(s) + 0 4 + 1 0 + 2 2 + dtype: Int64 + + Returns: + bigframes.series.Series: A Series of integer values indicating + the length of each element in the Series. + + """ + return series._apply_unary_op(ops.len_op) diff --git a/tests/system/small/bigquery/__init__.py b/tests/system/small/bigquery/__init__.py new file mode 100644 index 0000000000..1dc90d1848 --- /dev/null +++ b/tests/system/small/bigquery/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/system/small/bigquery/test_array.py b/tests/system/small/bigquery/test_array.py new file mode 100644 index 0000000000..7463c4c1b9 --- /dev/null +++ b/tests/system/small/bigquery/test_array.py @@ -0,0 +1,30 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pandas as pd + +import bigframes.bigquery as bbq +import bigframes.pandas as bpd + + +def test_array_length(): + series = bpd.Series([["A", "AA", "AAA"], ["BB", "B"], np.nan, [], ["C"]]) + expected = pd.Series([3, 2, 0, 0, 1]) + pd.testing.assert_series_equal( + bbq.array_length(series).to_pandas(), + expected, + check_dtype=False, + check_index_type=False, + ) From d0cf58584e307ab7fae14d9350914d43eb3a0cca Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 22 Apr 2024 20:57:19 +0000 Subject: [PATCH 2/7] add docs --- bigframes/bigquery/__init__.py | 41 ++++++++++++++--- bigframes/bigquery/array.py | 49 --------------------- docs/reference/bigframes.bigquery/index.rst | 9 ++++ docs/reference/bigframes/index.rst | 1 + docs/reference/index.rst | 1 + docs/templates/toc.yml | 2 + tests/system/small/bigquery/__init__.py | 2 +- tests/system/small/bigquery/test_array.py | 2 +- 8 files changed, 51 insertions(+), 56 deletions(-) delete mode 100644 bigframes/bigquery/array.py create mode 100644 docs/reference/bigframes.bigquery/index.rst diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 688f60ca0d..93d5b1155b 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -12,10 +12,41 @@ # See the License for the specific language governing permissions and # limitations under the License. -import bigframes.bigquery.array as array -array_length = array.array_length +"""This module integrates BigQuery built-in functions for use with DataFrame objects, +such as array functions: +https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions. """ -__all___ = [ - "array_length", -] + +from __future__ import annotations + +import typing + +import bigframes.operations as ops + +if typing.TYPE_CHECKING: + import bigframes.series as series + + +def array_length(series: series.Series) -> series.Series: + """Compute the length of each array element in the Series. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([[1, 2, 8, 3], [], [3, 4]]) + >>> bbq.array_length(s) + 0 4 + 1 0 + 2 2 + dtype: Int64 + + Returns: + bigframes.series.Series: A Series of integer values indicating + the length of each element in the Series. + + """ + return series._apply_unary_op(ops.len_op) diff --git a/bigframes/bigquery/array.py b/bigframes/bigquery/array.py deleted file mode 100644 index 2e49842dea..0000000000 --- a/bigframes/bigquery/array.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This module defines BigQuery built-in array functions: -https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions.""" - -from __future__ import annotations - -import typing - -import bigframes.operations as ops - -if typing.TYPE_CHECKING: - import bigframes.series as series - - -def array_length(series: series.Series) -> series.Series: - """Compute the length of each array element in the Series. - - **Examples:** - - >>> import bigframes.pandas as bpd - >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None - - >>> s = bpd.Series([[1, 2, 8, 3], [], [3, 4]]) - >>> bbq.array_length(s) - 0 4 - 1 0 - 2 2 - dtype: Int64 - - Returns: - bigframes.series.Series: A Series of integer values indicating - the length of each element in the Series. - - """ - return series._apply_unary_op(ops.len_op) diff --git a/docs/reference/bigframes.bigquery/index.rst b/docs/reference/bigframes.bigquery/index.rst new file mode 100644 index 0000000000..03e9bb48a4 --- /dev/null +++ b/docs/reference/bigframes.bigquery/index.rst @@ -0,0 +1,9 @@ + +=========================== +BigQuery Built-in Functions +=========================== + +.. automodule:: bigframes.bigquery + :members: + :inherited-members: + :undoc-members: diff --git a/docs/reference/bigframes/index.rst b/docs/reference/bigframes/index.rst index 76d64444fa..d26db18c96 100644 --- a/docs/reference/bigframes/index.rst +++ b/docs/reference/bigframes/index.rst @@ -1,4 +1,5 @@ +============ Core objects ============ diff --git a/docs/reference/index.rst b/docs/reference/index.rst index c790831db1..387e9b5ced 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -10,3 +10,4 @@ packages. bigframes/index bigframes.pandas/index bigframes.ml/index + bigframes.bigquery/index diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml index 4573296ec3..f3365aef74 100644 --- a/docs/templates/toc.yml +++ b/docs/templates/toc.yml @@ -189,5 +189,7 @@ uid: bigframes.ml.remote.VertexAIModel name: remote name: bigframes.ml + - name: Bigquery Built-in Functions + uid: bigframes.bigquery name: BigQuery DataFrames status: beta diff --git a/tests/system/small/bigquery/__init__.py b/tests/system/small/bigquery/__init__.py index 1dc90d1848..6d5e14bcf4 100644 --- a/tests/system/small/bigquery/__init__.py +++ b/tests/system/small/bigquery/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/system/small/bigquery/test_array.py b/tests/system/small/bigquery/test_array.py index 7463c4c1b9..c0f9b26ef6 100644 --- a/tests/system/small/bigquery/test_array.py +++ b/tests/system/small/bigquery/test_array.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 971ef0e68d37484a1bb78d131d7135cbf39e1a2e Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 22 Apr 2024 20:59:10 +0000 Subject: [PATCH 3/7] minor fix --- bigframes/bigquery/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 93d5b1155b..efe71953ba 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -14,7 +14,7 @@ """This module integrates BigQuery built-in functions for use with DataFrame objects, -such as array functions: +such as array functions: https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions. """ From 65b2740665d8b7344fdf8ee08f70125a14230fd2 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 22 Apr 2024 23:14:19 +0000 Subject: [PATCH 4/7] fixing docs --- docs/templates/toc.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml index f3365aef74..4bcba5d65e 100644 --- a/docs/templates/toc.yml +++ b/docs/templates/toc.yml @@ -189,7 +189,9 @@ uid: bigframes.ml.remote.VertexAIModel name: remote name: bigframes.ml - - name: Bigquery Built-in Functions - uid: bigframes.bigquery + - items: + - name: Bigquery Built-in Functions + uid: bigframes.bigquery + name: bigframes.bigquery name: BigQuery DataFrames status: beta From 5a2c2e8776e1954aa9af29e1e2593d29e48e505f Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 29 Apr 2024 21:32:52 +0000 Subject: [PATCH 5/7] add more doc tests --- bigframes/bigquery/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index efe71953ba..197e0a83b5 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -44,6 +44,14 @@ def array_length(series: series.Series) -> series.Series: 2 2 dtype: Int64 + You can also apply this function directly to Series. + + >>> s.apply(bbq.array_length, by_row=False) + 0 4 + 1 0 + 2 2 + dtype: Int64 + Returns: bigframes.series.Series: A Series of integer values indicating the length of each element in the Series. From 833e62f9bc5354bac5ebcdc8a5351f3d71f05d60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 30 Apr 2024 14:20:31 -0500 Subject: [PATCH 6/7] sentence-case --- docs/templates/toc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml index 4bcba5d65e..80ccc01fac 100644 --- a/docs/templates/toc.yml +++ b/docs/templates/toc.yml @@ -190,7 +190,7 @@ name: remote name: bigframes.ml - items: - - name: Bigquery Built-in Functions + - name: BigQuery built-in functions uid: bigframes.bigquery name: bigframes.bigquery name: BigQuery DataFrames From c78b4ce6a7dd8d159d5405096f01188c30f935ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 30 Apr 2024 14:22:18 -0500 Subject: [PATCH 7/7] TODO for null arrays --- tests/system/small/bigquery/test_array.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/system/small/bigquery/test_array.py b/tests/system/small/bigquery/test_array.py index c0f9b26ef6..a91669cd88 100644 --- a/tests/system/small/bigquery/test_array.py +++ b/tests/system/small/bigquery/test_array.py @@ -21,6 +21,8 @@ def test_array_length(): series = bpd.Series([["A", "AA", "AAA"], ["BB", "B"], np.nan, [], ["C"]]) + # TODO(b/336880368): Allow for NULL values to be input for ARRAY columns. + # Once we actually store NULL values, this will be NULL where the input is NULL. expected = pd.Series([3, 2, 0, 0, 1]) pd.testing.assert_series_equal( bbq.array_length(series).to_pandas(),