From c8dc24518e40ac337e3b85205a4f5a807d50c2e9 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sun, 29 Mar 2026 19:18:51 -0400
Subject: [PATCH 1/3] Add missing conditional functions: greatest, least, nvl2,
 ifnull (#1449)

Expose four conditional functions from upstream DataFusion that were
not yet available in the Python bindings.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 crates/core/src/functions.rs   | 10 +++++
 python/datafusion/functions.py | 79 ++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)

diff --git a/crates/core/src/functions.rs b/crates/core/src/functions.rs
index c32134054..785352741 100644
--- a/crates/core/src/functions.rs
+++ b/crates/core/src/functions.rs
@@ -494,6 +494,8 @@ expr_fn!(length, string);
 expr_fn!(char_length, string);
 expr_fn!(chr, arg, "Returns the character with the given code.");
 expr_fn_vec!(coalesce);
+expr_fn_vec!(greatest);
+expr_fn_vec!(least);
 expr_fn!(cos, num);
 expr_fn!(cosh, num);
 expr_fn!(cot, num);
@@ -543,6 +545,11 @@ expr_fn!(
     x y,
     "Returns x if x is not NULL otherwise returns y."
 );
+expr_fn!(
+    nvl2,
+    x y z,
+    "Returns y if x is not NULL; otherwise returns z."
+);
 expr_fn!(nullif, arg_1 arg_2);
 expr_fn!(
     octet_length,
@@ -981,6 +988,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(floor))?;
     m.add_wrapped(wrap_pyfunction!(from_unixtime))?;
     m.add_wrapped(wrap_pyfunction!(gcd))?;
+    m.add_wrapped(wrap_pyfunction!(greatest))?;
     // m.add_wrapped(wrap_pyfunction!(grouping))?;
     m.add_wrapped(wrap_pyfunction!(in_list))?;
     m.add_wrapped(wrap_pyfunction!(initcap))?;
@@ -988,6 +996,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(iszero))?;
     m.add_wrapped(wrap_pyfunction!(levenshtein))?;
     m.add_wrapped(wrap_pyfunction!(lcm))?;
+    m.add_wrapped(wrap_pyfunction!(least))?;
     m.add_wrapped(wrap_pyfunction!(left))?;
     m.add_wrapped(wrap_pyfunction!(length))?;
     m.add_wrapped(wrap_pyfunction!(ln))?;
@@ -1005,6 +1014,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(named_struct))?;
     m.add_wrapped(wrap_pyfunction!(nanvl))?;
     m.add_wrapped(wrap_pyfunction!(nvl))?;
+    m.add_wrapped(wrap_pyfunction!(nvl2))?;
     m.add_wrapped(wrap_pyfunction!(now))?;
     m.add_wrapped(wrap_pyfunction!(nullif))?;
     m.add_wrapped(wrap_pyfunction!(octet_length))?;
diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index f062cbfce..d93e46292 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -149,6 +149,8 @@
     "floor",
     "from_unixtime",
     "gcd",
+    "greatest",
+    "ifnull",
     "in_list",
     "initcap",
     "isnan",
@@ -157,6 +159,7 @@
     "last_value",
     "lcm",
     "lead",
+    "least",
     "left",
     "length",
     "levenshtein",
@@ -212,6 +215,7 @@
     "ntile",
     "nullif",
     "nvl",
+    "nvl2",
     "octet_length",
     "order_by",
     "overlay",
@@ -1027,6 +1031,44 @@ def gcd(x: Expr, y: Expr) -> Expr:
     return Expr(f.gcd(x.expr, y.expr))
 
 
+def greatest(*args: Expr) -> Expr:
+    """Returns the greatest value from a list of expressions.
+
+    Returns NULL if all expressions are NULL.
+
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1, 3], "b": [2, 1]})
+        >>> result = df.select(
+        ...     dfn.functions.greatest(dfn.col("a"), dfn.col("b")).alias("greatest"))
+        >>> result.collect_column("greatest")[0].as_py()
+        2
+        >>> result.collect_column("greatest")[1].as_py()
+        3
+    """
+    args = [arg.expr for arg in args]
+    return Expr(f.greatest(*args))
+
+
+def ifnull(x: Expr, y: Expr) -> Expr:
+    """Returns ``x`` if ``x`` is not NULL. Otherwise returns ``y``.
+
+    This is an alias for :py:func:`nvl`.
+
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [None, 1], "b": [0, 0]})
+        >>> result = df.select(
+        ...     dfn.functions.ifnull(dfn.col("a"), dfn.col("b")).alias("ifnull")
+        ... )
+        >>> result.collect_column("ifnull")[0].as_py()
+        0
+        >>> result.collect_column("ifnull")[1].as_py()
+        1
+    """
+    return nvl(x, y)
+
+
 def initcap(string: Expr) -> Expr:
     """Set the initial letter of each word to capital.
 
@@ -1080,6 +1122,25 @@ def lcm(x: Expr, y: Expr) -> Expr:
     return Expr(f.lcm(x.expr, y.expr))
 
 
+def least(*args: Expr) -> Expr:
+    """Returns the least value from a list of expressions.
+
+    Returns NULL if all expressions are NULL.
+
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1, 3], "b": [2, 1]})
+        >>> result = df.select(
+        ...     dfn.functions.least(dfn.col("a"), dfn.col("b")).alias("least"))
+        >>> result.collect_column("least")[0].as_py()
+        1
+        >>> result.collect_column("least")[1].as_py()
+        1
+    """
+    args = [arg.expr for arg in args]
+    return Expr(f.least(*args))
+
+
 def left(string: Expr, n: Expr) -> Expr:
     """Returns the first ``n`` characters in the ``string``.
 
@@ -1264,6 +1325,24 @@ def nvl(x: Expr, y: Expr) -> Expr:
     return Expr(f.nvl(x.expr, y.expr))
 
 
+def nvl2(x: Expr, y: Expr, z: Expr) -> Expr:
+    """Returns ``y`` if ``x`` is not NULL. Otherwise returns ``z``.
+
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [None, 1], "b": [10, 20], "c": [30, 40]})
+        >>> result = df.select(
+        ...     dfn.functions.nvl2(
+        ...         dfn.col("a"), dfn.col("b"), dfn.col("c")).alias("nvl2")
+        ... )
+        >>> result.collect_column("nvl2")[0].as_py()
+        30
+        >>> result.collect_column("nvl2")[1].as_py()
+        20
+    """
+    return Expr(f.nvl2(x.expr, y.expr, z.expr))
+
+
 def octet_length(arg: Expr) -> Expr:
     """Returns the number of bytes of a string.
 

From 0d148319f33c0948a0df2386c904a86d122c6bad Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sun, 29 Mar 2026 19:23:08 -0400
Subject: [PATCH 2/3] Add unit tests for greatest, least, nvl2, and ifnull
 functions

Tests cover multiple data types (integers, strings), null handling
(all-null, partial-null), multiple arguments, and ifnull/nvl equivalence.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 python/tests/test_functions.py | 162 +++++++++++++++++++++++++++++++++
 1 file changed, 162 insertions(+)

diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index 37d349c58..e10a1e94f 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -1435,3 +1435,165 @@ def test_coalesce(df):
     assert result.column(0) == pa.array(
         ["Hello", "fallback", "!"], type=pa.string_view()
     )
+
+
+def test_greatest(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, 5, None]),
+            pa.array([3, 2, None]),
+            pa.array([2, 8, None]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # Test greatest with two columns
+    result = df_test.select(
+        f.greatest(column("a"), column("b")).alias("greatest_ab")
+    ).collect()[0]
+    assert result.column(0) == pa.array([3, 5, None], type=pa.int64())
+
+    # Test greatest with three columns
+    result = df_test.select(
+        f.greatest(column("a"), column("b"), column("c")).alias("greatest_abc")
+    ).collect()[0]
+    assert result.column(0) == pa.array([3, 8, None], type=pa.int64())
+
+    # Test greatest with nulls mixed in (partial nulls)
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 10]),
+            pa.array([5, None]),
+        ],
+        names=["x", "y"],
+    )
+    df_test2 = ctx.create_dataframe([[batch2]])
+    result = df_test2.select(f.greatest(column("x"), column("y")).alias("g")).collect()[
+        0
+    ]
+    assert result.column(0) == pa.array([5, 10], type=pa.int64())
+
+    # Test greatest with string columns
+    batch3 = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["apple", "cherry"]),
+            pa.array(["banana", "apricot"]),
+        ],
+        names=["s1", "s2"],
+    )
+    df_test3 = ctx.create_dataframe([[batch3]])
+    result = df_test3.select(
+        f.greatest(column("s1"), column("s2")).alias("g")
+    ).collect()[0]
+    assert result.column(0).to_pylist() == ["banana", "cherry"]
+
+
+def test_least(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, 5, None]),
+            pa.array([3, 2, None]),
+            pa.array([2, 8, None]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # Test least with two columns
+    result = df_test.select(
+        f.least(column("a"), column("b")).alias("least_ab")
+    ).collect()[0]
+    assert result.column(0) == pa.array([1, 2, None], type=pa.int64())
+
+    # Test least with three columns
+    result = df_test.select(
+        f.least(column("a"), column("b"), column("c")).alias("least_abc")
+    ).collect()[0]
+    assert result.column(0) == pa.array([1, 2, None], type=pa.int64())
+
+    # Test least with partial nulls
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 10]),
+            pa.array([5, None]),
+        ],
+        names=["x", "y"],
+    )
+    df_test2 = ctx.create_dataframe([[batch2]])
+    result = df_test2.select(f.least(column("x"), column("y")).alias("l")).collect()[0]
+    assert result.column(0) == pa.array([5, 10], type=pa.int64())
+
+    # Test least with string columns
+    batch3 = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["apple", "cherry"]),
+            pa.array(["banana", "apricot"]),
+        ],
+        names=["s1", "s2"],
+    )
+    df_test3 = ctx.create_dataframe([[batch3]])
+    result = df_test3.select(f.least(column("s1"), column("s2")).alias("l")).collect()[
+        0
+    ]
+    assert result.column(0).to_pylist() == ["apple", "apricot"]
+
+
+def test_nvl2(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 1, None, 4]),
+            pa.array([10, 20, 30, 40]),
+            pa.array([100, 200, 300, 400]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # nvl2 returns b when a is not null, c when a is null
+    result = df_test.select(
+        f.nvl2(column("a"), column("b"), column("c")).alias("result")
+    ).collect()[0]
+    assert result.column(0) == pa.array([100, 20, 300, 40], type=pa.int64())
+
+    # Test with string columns
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["x", None]),
+            pa.array(["not_null", "not_null"]),
+            pa.array(["is_null", "is_null"]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test2 = ctx.create_dataframe([[batch2]])
+    result = df_test2.select(
+        f.nvl2(column("a"), column("b"), column("c")).alias("result")
+    ).collect()[0]
+    assert result.column(0).to_pylist() == ["not_null", "is_null"]
+
+
+def test_ifnull(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 1, None, 4]),
+            pa.array([10, 20, 30, 40]),
+        ],
+        names=["a", "b"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # ifnull returns a when a is not null, b when a is null (same as nvl)
+    result = df_test.select(
+        f.ifnull(column("a"), column("b")).alias("result")
+    ).collect()[0]
+    assert result.column(0) == pa.array([10, 1, 30, 4], type=pa.int64())
+
+    # Verify ifnull matches nvl behavior
+    result_nvl = df_test.select(
+        f.nvl(column("a"), column("b")).alias("nvl_result")
+    ).collect()[0]
+    assert result.column(0) == result_nvl.column(0)

From c7733492920b9dedb7d838cebbb1340b6be1ded3 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sun, 29 Mar 2026 19:30:06 -0400
Subject: [PATCH 3/3] Use standard alias docstring pattern for ifnull

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 python/datafusion/functions.py | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index d93e46292..73612fb0c 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -1053,18 +1053,8 @@ def greatest(*args: Expr) -> Expr:
 def ifnull(x: Expr, y: Expr) -> Expr:
     """Returns ``x`` if ``x`` is not NULL. Otherwise returns ``y``.
 
-    This is an alias for :py:func:`nvl`.
-
-    Examples:
-        >>> ctx = dfn.SessionContext()
-        >>> df = ctx.from_pydict({"a": [None, 1], "b": [0, 0]})
-        >>> result = df.select(
-        ...     dfn.functions.ifnull(dfn.col("a"), dfn.col("b")).alias("ifnull")
-        ... )
-        >>> result.collect_column("ifnull")[0].as_py()
-        0
-        >>> result.collect_column("ifnull")[1].as_py()
-        1
+    See Also:
+        This is an alias for :py:func:`nvl`.
     """
     return nvl(x, y)