Skip to content

Commit

Permalink
Provide a numpy compatibility mapping to np.full_like
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-jkew committed Oct 23, 2024
1 parent cd8f160 commit c3cf7c6
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
- Added support for timedelta inputs in `value_counts()`.
- Added support for applying Snowpark Python function `snowflake_cortex_summarize`.
- Added support for `DataFrame`/`Series.attrs`
- Added numpy compatibility support for `np.full_like`

#### Improvements

Expand Down
3 changes: 3 additions & 0 deletions docs/source/modin/numpy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ NumPy ufuncs called with Snowpark pandas arguments will ignore kwargs.
| | dispatcher at all, and the normal NumPy behavior |
| | will occur.) |
+-----------------------------+----------------------------------------------------+
| ``np.full_like`` | Mapped to pd.DataFrame(value, index=range(height), |
| | columns=range(width)) |
+-----------------------------+----------------------------------------------------+
| ``np.may_share_memory`` | Returns False |
+-----------------------------+----------------------------------------------------+
| ``np.add`` | Mapped to df.__add__(df2) |
Expand Down
27 changes: 26 additions & 1 deletion src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
#
from typing import Any, Optional, Union
from typing import Any, Hashable, Optional, Union

import modin.pandas as pd
from modin.pandas.base import BasePandasDataset
Expand Down Expand Up @@ -112,6 +112,30 @@ def may_share_memory_mapper(a: Any, b: Any, max_work: Optional[int] = None) -> b
return False


def full_like_mapper(
a: Union[pd.DataFrame, pd.Series],
fill_value: Hashable,
dtype: Optional[Any] = None,
order: Optional[str] = "K",
subok: Optional[bool] = True,
shape: Optional[tuple[Any]] = None,
) -> Union[pd.DataFrame, pd.Series]:
if not subok:
return NotImplemented
if not order == "K":
return NotImplemented
if dtype is not None:
return NotImplemented

result_shape = shape or a.shape
if len(result_shape) == 2:
height, width = result_shape # type: ignore
return pd.DataFrame(fill_value, index=range(height), columns=range(width))
if len(result_shape) == 1:
return pd.Series(fill_value, index=range(result_shape[0]))
return NotImplemented


# We also need to convert everything to booleans, since numpy will
# do this implicitly on logical operators and pandas does not.
def map_to_bools(inputs: Any) -> Any:
Expand All @@ -125,6 +149,7 @@ def map_to_bools(inputs: Any) -> Any:
numpy_to_pandas_func_map = {
"where": where_mapper,
"may_share_memory": may_share_memory_mapper,
"full_like": full_like_mapper,
}

# Map that associates a numpy universal function name that operates on
Expand Down
39 changes: 39 additions & 0 deletions tests/integ/modin/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,45 @@ def test_np_may_share_memory():
assert not np.may_share_memory(snow_df_A, native_df_A)


def test_full_like():
data = {
"A": [0, 1, 2, 0, 1, 2, 0, 1, 2],
"B": [True, False, True, True, False, True, False, False, False],
"C": ["a", "b", "c", "d", "a", "b", "c", "d", "e"],
}
snow_df = pd.DataFrame(data)
pandas_df = native_pd.DataFrame(data)

with SqlCounter(query_count=2):
snow_result = np.full_like(snow_df, 1234)
pandas_result = np.full_like(pandas_df, 1234)
assert_array_equal(np.array(snow_result), np.array(pandas_result))

with SqlCounter(query_count=1):
snow_result = np.full_like(snow_df, 1234, shape=(5, 3))
pandas_result = np.full_like(pandas_df, 1234, shape=(5, 3))
assert_array_equal(np.array(snow_result), np.array(pandas_result))

with SqlCounter(query_count=2):
snow_result = np.full_like(snow_df["A"], 1234)
pandas_result = np.full_like(pandas_df["A"], 1234)
assert_array_equal(np.array(snow_result), np.array(pandas_result))

with SqlCounter(query_count=1):
snow_result = np.full_like(snow_df, "numpy is the best")
pandas_result = np.full_like(pandas_df, "numpy is the best")
assert_array_equal(np.array(snow_result), np.array(pandas_result))

with pytest.raises(TypeError):
np.full_like(snow_df, 1234, subok=False)

with pytest.raises(TypeError):
np.full_like(snow_df, 1234, order="D")

with pytest.raises(TypeError):
np.full_like(snow_df, 1234, dtype=int)


def test_logical_operators():
data = {
"A": [0, 1, 2, 0, 1, 2, 0, 1, 2],
Expand Down

0 comments on commit c3cf7c6

Please sign in to comment.