Provide a numpy compatibility mapping to np.full_like

snowflakedb · Oct 23, 2024 · c3cf7c6 · c3cf7c6
1 parent cd8f160
commit c3cf7c6
Show file tree

Hide file tree

Showing 4 changed files with 69 additions and 1 deletion.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -38,6 +38,7 @@
 - Added support for timedelta inputs in `value_counts()`.
 - Added support for applying Snowpark Python function `snowflake_cortex_summarize`.
 - Added support for `DataFrame`/`Series.attrs`
+- Added numpy compatibility support for `np.full_like`
 
 #### Improvements
 

diff --git a/docs/source/modin/numpy.rst b/docs/source/modin/numpy.rst
@@ -25,6 +25,9 @@ NumPy ufuncs called with Snowpark pandas arguments will ignore kwargs.
 |                             | dispatcher at all, and the normal NumPy behavior   |
 |                             | will occur.)                                       |
 +-----------------------------+----------------------------------------------------+
+| ``np.full_like``            | Mapped to pd.DataFrame(value, index=range(height), |
+|                             |                        columns=range(width))       |
++-----------------------------+----------------------------------------------------+
 | ``np.may_share_memory``     | Returns False                                      |
 +-----------------------------+----------------------------------------------------+
 | ``np.add``                  | Mapped to df.__add__(df2)                          |

diff --git a/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py b/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py
@@ -1,7 +1,7 @@
 #
 # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
 #
-from typing import Any, Optional, Union
+from typing import Any, Hashable, Optional, Union
 
 import modin.pandas as pd
 from modin.pandas.base import BasePandasDataset
@@ -112,6 +112,30 @@ def may_share_memory_mapper(a: Any, b: Any, max_work: Optional[int] = None) -> b
     return False
 
 
+def full_like_mapper(
+    a: Union[pd.DataFrame, pd.Series],
+    fill_value: Hashable,
+    dtype: Optional[Any] = None,
+    order: Optional[str] = "K",
+    subok: Optional[bool] = True,
+    shape: Optional[tuple[Any]] = None,
+) -> Union[pd.DataFrame, pd.Series]:
+    if not subok:
+        return NotImplemented
+    if not order == "K":
+        return NotImplemented
+    if dtype is not None:
+        return NotImplemented
+
+    result_shape = shape or a.shape
+    if len(result_shape) == 2:
+        height, width = result_shape  # type: ignore
+        return pd.DataFrame(fill_value, index=range(height), columns=range(width))
+    if len(result_shape) == 1:
+        return pd.Series(fill_value, index=range(result_shape[0]))
+    return NotImplemented
+
+
 # We also need to convert everything to booleans, since numpy will
 # do this implicitly on logical operators and pandas does not.
 def map_to_bools(inputs: Any) -> Any:
@@ -125,6 +149,7 @@ def map_to_bools(inputs: Any) -> Any:
 numpy_to_pandas_func_map = {
     "where": where_mapper,
     "may_share_memory": may_share_memory_mapper,
+    "full_like": full_like_mapper,
 }
 
 # Map that associates a numpy universal function name that operates on

diff --git a/tests/integ/modin/test_numpy.py b/tests/integ/modin/test_numpy.py
@@ -57,6 +57,45 @@ def test_np_may_share_memory():
         assert not np.may_share_memory(snow_df_A, native_df_A)
 
 
+def test_full_like():
+    data = {
+        "A": [0, 1, 2, 0, 1, 2, 0, 1, 2],
+        "B": [True, False, True, True, False, True, False, False, False],
+        "C": ["a", "b", "c", "d", "a", "b", "c", "d", "e"],
+    }
+    snow_df = pd.DataFrame(data)
+    pandas_df = native_pd.DataFrame(data)
+
+    with SqlCounter(query_count=2):
+        snow_result = np.full_like(snow_df, 1234)
+        pandas_result = np.full_like(pandas_df, 1234)
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        snow_result = np.full_like(snow_df, 1234, shape=(5, 3))
+        pandas_result = np.full_like(pandas_df, 1234, shape=(5, 3))
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=2):
+        snow_result = np.full_like(snow_df["A"], 1234)
+        pandas_result = np.full_like(pandas_df["A"], 1234)
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        snow_result = np.full_like(snow_df, "numpy is the best")
+        pandas_result = np.full_like(pandas_df, "numpy is the best")
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with pytest.raises(TypeError):
+        np.full_like(snow_df, 1234, subok=False)
+
+    with pytest.raises(TypeError):
+        np.full_like(snow_df, 1234, order="D")
+
+    with pytest.raises(TypeError):
+        np.full_like(snow_df, 1234, dtype=int)
+
+
 def test_logical_operators():
     data = {
         "A": [0, 1, 2, 0, 1, 2, 0, 1, 2],