Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,6 +1305,34 @@ def nsmallest(
column_ids = self._sql_names(columns)
return DataFrame(block_ops.nsmallest(self._block, n, column_ids, keep=keep))

def insert(
self,
loc: int,
column: blocks.Label,
value: SingleItemValue,
allow_duplicates: bool = False,
):
column_count = len(self.columns)
if loc > column_count:
raise IndexError(
f"Column index {loc} is out of bounds with {column_count} total columns."
)
if (column in self.columns) and not allow_duplicates:
raise ValueError(f"cannot insert {column}, already exists")

temp_column = bigframes.core.guid.generate_guid(prefix=str(column))
df = self._assign_single_item(temp_column, value)

block = df._get_block()
value_columns = typing.cast(List, block.value_columns)
value_columns, new_column = value_columns[:-1], value_columns[-1]
value_columns.insert(loc, new_column)

block = block.select_columns(value_columns)
block = block.rename(columns={temp_column: column})

self._set_block(block)

def drop(
self,
labels: typing.Any = None,
Expand Down
38 changes: 38 additions & 0 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,44 @@ def test_get_columns_default(scalars_dfs):
assert result == "default_val"


@pytest.mark.parametrize(
("loc", "column", "value", "allow_duplicates"),
[
(0, 666, 2, False),
(5, "float64_col", 2.2, True),
(13, "rowindex_2", [8, 7, 6, 5, 4, 3, 2, 1, 0], True),
pytest.param(
14,
"test",
2,
False,
marks=pytest.mark.xfail(
raises=IndexError,
),
),
pytest.param(
12,
"int64_col",
2,
False,
marks=pytest.mark.xfail(
raises=ValueError,
),
),
],
)
def test_insert(scalars_dfs, loc, column, value, allow_duplicates):
scalars_df, scalars_pandas_df = scalars_dfs
# insert works inplace, so will influence other tests.
# make a copy to avoid inplace changes.
bf_df = scalars_df.copy()
pd_df = scalars_pandas_df.copy()
bf_df.insert(loc, column, value, allow_duplicates)
pd_df.insert(loc, column, value, allow_duplicates)

pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df, check_dtype=False)


def test_drop_column(scalars_dfs):
scalars_df, scalars_pandas_df = scalars_dfs
col_name = "int64_col"
Expand Down
45 changes: 45 additions & 0 deletions third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,51 @@ def reindex_like(self, other):
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def insert(self, loc, column, value, allow_duplicates=False):
"""Insert column into DataFrame at specified location.

Raises a ValueError if `column` is already contained in the DataFrame,
unless `allow_duplicates` is set to True.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})

Insert a new column named 'col3' between 'col1' and 'col2' with all entries set to 5.

>>> df.insert(1, 'col3', 5)
>>> df
col1 col3 col2
0 1 5 3
1 2 5 4
<BLANKLINE>
[2 rows x 3 columns]

Insert another column named 'col2' at the beginning of the DataFrame with values [5, 6]

>>> df.insert(0, 'col2', [5, 6], allow_duplicates=True)
>>> df
col2 col1 col3 col2
0 5 1 5 3
1 6 2 5 4
<BLANKLINE>
[2 rows x 4 columns]

Args:
loc (int):
Insertion index. Must verify 0 <= loc <= len(columns).
column (str, number, or hashable object):
Label of the inserted column.
value (Scalar, Series, or array-like):
Content of the inserted column.
allow_duplicates (bool, default False):
Allow duplicate column labels to be created.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def drop(
self, labels=None, *, axis=0, index=None, columns=None, level=None
) -> DataFrame | None:
Expand Down