From c3cf7c6079a7c44d98bb8a2d0afd16785a37fd40 Mon Sep 17 00:00:00 2001
From: John Kew <john.kew@snowflake.com>
Date: Wed, 23 Oct 2024 16:55:42 -0700
Subject: [PATCH] Provide a numpy compatibility mapping to np.full_like

---
 CHANGELOG.md                                  |  1 +
 docs/source/modin/numpy.rst                   |  3 ++
 .../modin/plugin/utils/numpy_to_pandas.py     | 27 ++++++++++++-
 tests/integ/modin/test_numpy.py               | 39 +++++++++++++++++++
 4 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7ce7fee72f..c3b3db51fe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,7 @@
 - Added support for timedelta inputs in `value_counts()`.
 - Added support for applying Snowpark Python function `snowflake_cortex_summarize`.
 - Added support for `DataFrame`/`Series.attrs`
+- Added numpy compatibility support for `np.full_like`
 
 #### Improvements
 
diff --git a/docs/source/modin/numpy.rst b/docs/source/modin/numpy.rst
index fa9c650aba..83d24bdd63 100644
--- a/docs/source/modin/numpy.rst
+++ b/docs/source/modin/numpy.rst
@@ -25,6 +25,9 @@ NumPy ufuncs called with Snowpark pandas arguments will ignore kwargs.
 |                             | dispatcher at all, and the normal NumPy behavior   |
 |                             | will occur.)                                       |
 +-----------------------------+----------------------------------------------------+
+| ``np.full_like``            | Mapped to pd.DataFrame(value, index=range(height), |
+|                             |                        columns=range(width))       |
++-----------------------------+----------------------------------------------------+
 | ``np.may_share_memory``     | Returns False                                      |
 +-----------------------------+----------------------------------------------------+
 | ``np.add``                  | Mapped to df.__add__(df2)                          |
diff --git a/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py b/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py
index c751b4fe55..4fa1b58bab 100644
--- a/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py
+++ b/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py
@@ -1,7 +1,7 @@
 #
 # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
 #
-from typing import Any, Optional, Union
+from typing import Any, Hashable, Optional, Union
 
 import modin.pandas as pd
 from modin.pandas.base import BasePandasDataset
@@ -112,6 +112,30 @@ def may_share_memory_mapper(a: Any, b: Any, max_work: Optional[int] = None) -> b
     return False
 
 
+def full_like_mapper(
+    a: Union[pd.DataFrame, pd.Series],
+    fill_value: Hashable,
+    dtype: Optional[Any] = None,
+    order: Optional[str] = "K",
+    subok: Optional[bool] = True,
+    shape: Optional[tuple[Any]] = None,
+) -> Union[pd.DataFrame, pd.Series]:
+    if not subok:
+        return NotImplemented
+    if not order == "K":
+        return NotImplemented
+    if dtype is not None:
+        return NotImplemented
+
+    result_shape = shape or a.shape
+    if len(result_shape) == 2:
+        height, width = result_shape  # type: ignore
+        return pd.DataFrame(fill_value, index=range(height), columns=range(width))
+    if len(result_shape) == 1:
+        return pd.Series(fill_value, index=range(result_shape[0]))
+    return NotImplemented
+
+
 # We also need to convert everything to booleans, since numpy will
 # do this implicitly on logical operators and pandas does not.
 def map_to_bools(inputs: Any) -> Any:
@@ -125,6 +149,7 @@ def map_to_bools(inputs: Any) -> Any:
 numpy_to_pandas_func_map = {
     "where": where_mapper,
     "may_share_memory": may_share_memory_mapper,
+    "full_like": full_like_mapper,
 }
 
 # Map that associates a numpy universal function name that operates on
diff --git a/tests/integ/modin/test_numpy.py b/tests/integ/modin/test_numpy.py
index 6719055ab9..b437266561 100644
--- a/tests/integ/modin/test_numpy.py
+++ b/tests/integ/modin/test_numpy.py
@@ -57,6 +57,45 @@ def test_np_may_share_memory():
         assert not np.may_share_memory(snow_df_A, native_df_A)
 
 
+def test_full_like():
+    data = {
+        "A": [0, 1, 2, 0, 1, 2, 0, 1, 2],
+        "B": [True, False, True, True, False, True, False, False, False],
+        "C": ["a", "b", "c", "d", "a", "b", "c", "d", "e"],
+    }
+    snow_df = pd.DataFrame(data)
+    pandas_df = native_pd.DataFrame(data)
+
+    with SqlCounter(query_count=2):
+        snow_result = np.full_like(snow_df, 1234)
+        pandas_result = np.full_like(pandas_df, 1234)
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        snow_result = np.full_like(snow_df, 1234, shape=(5, 3))
+        pandas_result = np.full_like(pandas_df, 1234, shape=(5, 3))
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=2):
+        snow_result = np.full_like(snow_df["A"], 1234)
+        pandas_result = np.full_like(pandas_df["A"], 1234)
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        snow_result = np.full_like(snow_df, "numpy is the best")
+        pandas_result = np.full_like(pandas_df, "numpy is the best")
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with pytest.raises(TypeError):
+        np.full_like(snow_df, 1234, subok=False)
+
+    with pytest.raises(TypeError):
+        np.full_like(snow_df, 1234, order="D")
+
+    with pytest.raises(TypeError):
+        np.full_like(snow_df, 1234, dtype=int)
+
+
 def test_logical_operators():
     data = {
         "A": [0, 1, 2, 0, 1, 2, 0, 1, 2],