snowflakedb · sfc-gh-jkew · Oct 23, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 29, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -80,6 +80,9 @@
 - Added support for applying Snowpark Python function `snowflake_cortex_summarize`.
 - Added support for `DataFrame.attrs` and `Series.attrs`.
 - Added support for `DataFrame.style`.
+- Added support for `Index.to_numpy`.
+- Added support for `DataFrame.align` and `Series.align` for `axis=0`.
+- Added numpy compatibility support for `np.full_like`
 
 #### Improvements
 

@@ -25,6 +25,9 @@ NumPy ufuncs called with Snowpark pandas arguments will ignore kwargs.
 |                             | dispatcher at all, and the normal NumPy behavior   |
 |                             | will occur.)                                       |
 +-----------------------------+----------------------------------------------------+
+| ``np.full_like``            | Mapped to pd.DataFrame(value, index=range(height), |
+|                             |                        columns=range(width))       |
++-----------------------------+----------------------------------------------------+
 | ``np.may_share_memory``     | Returns False                                      |
 +-----------------------------+----------------------------------------------------+
 | ``np.add``                  | Mapped to df.__add__(df2)                          |

@@ -1,7 +1,7 @@
 #
 # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
 #
-from typing import Any, Optional, Union
+from typing import Any, Hashable, Optional, Union
 
 import modin.pandas as pd
 from modin.pandas.base import BasePandasDataset
@@ -112,6 +112,36 @@ def may_share_memory_mapper(a: Any, b: Any, max_work: Optional[int] = None) -> b
     return False
 
 
+def full_like_mapper(
+    a: Union[pd.DataFrame, pd.Series],
+    fill_value: Hashable,
+    dtype: Optional[Any] = None,
+    order: Optional[str] = "K",
+    subok: Optional[bool] = True,
+    shape: Optional[tuple[Any]] = None,
+) -> Union[pd.DataFrame, pd.Series]:
+    if not subok:
+        return NotImplemented
+    if not order == "K":
+        return NotImplemented
+    if dtype is not None:
+        return NotImplemented
+
+    result_shape = shape
+    if isinstance(result_shape, tuple) and len(result_shape) == 0:
+        result_shape = (1,)
+    if isinstance(result_shape, int):
+        result_shape = (result_shape,)
+    if result_shape is None:
+        result_shape = a.shape
+    if len(result_shape) == 2:
+        height, width = result_shape  # type: ignore
+        return pd.DataFrame(fill_value, index=range(height), columns=range(width))
+    if len(result_shape) == 1:
+        return pd.Series(fill_value, index=range(result_shape[0]))
+    return NotImplemented
+
+
 # We also need to convert everything to booleans, since numpy will
 # do this implicitly on logical operators and pandas does not.
 def map_to_bools(inputs: Any) -> Any:
@@ -125,6 +155,7 @@ def map_to_bools(inputs: Any) -> Any:
 numpy_to_pandas_func_map = {
     "where": where_mapper,
     "may_share_memory": may_share_memory_mapper,
+    "full_like": full_like_mapper,
 }
 
 # Map that associates a numpy universal function name that operates on

@@ -57,6 +57,63 @@ def test_np_may_share_memory():
         assert not np.may_share_memory(snow_df_A, native_df_A)
 
 
+def test_full_like():
+    data = {
+        "A": [0, 1, 2, 0, 1, 2, 0, 1, 2],
+        "B": [True, False, True, True, False, True, False, False, False],
+        "C": ["a", "b", "c", "d", "a", "b", "c", "d", "e"],
+    }
+    snow_df = pd.DataFrame(data)
+    pandas_df = native_pd.DataFrame(data)
+
+    with SqlCounter(query_count=2):
+        snow_result = np.full_like(snow_df, 1234)
+        pandas_result = np.full_like(pandas_df, 1234)
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        snow_result = np.full_like(snow_df, 1234, shape=(5, 3))
+        pandas_result = np.full_like(pandas_df, 1234, shape=(5, 3))
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=2):
+        snow_result = np.full_like(snow_df["A"], 1234)
+        pandas_result = np.full_like(pandas_df["A"], 1234)
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        snow_result = np.full_like(snow_df, "numpy is the best")
+        pandas_result = np.full_like(pandas_df, "numpy is the best")
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        pandas_result = np.full_like(pandas_df, fill_value=4, shape=())
+        snow_result = np.full_like(snow_df, fill_value=4, shape=())
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        snow_result = np.full_like(snow_df, fill_value=4, shape=4)
+        pandas_result = np.full_like(pandas_df, fill_value=4, shape=4)
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        snow_result = np.full_like(snow_df, fill_value=4, shape=(4,))
+        pandas_result = np.full_like(pandas_df, fill_value=4, shape=(4,))
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with pytest.raises(TypeError):
+        np.full_like(snow_df, 1234, shape=[])
+
+    with pytest.raises(TypeError):
+        np.full_like(snow_df, 1234, subok=False)
+
+    with pytest.raises(TypeError):
+        np.full_like(snow_df, 1234, order="D")
+
+    with pytest.raises(TypeError):
+        np.full_like(snow_df, 1234, dtype=int)
+
+
 def test_logical_operators():
     data = {
         "A": [0, 1, 2, 0, 1, 2, 0, 1, 2],