From 300d2f9d9f6dd35805c3bcb6504766840cc8ea78 Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem.elmeleegy@snowflake.com>
Date: Thu, 24 Oct 2024 01:57:22 -0700
Subject: [PATCH 1/7] SNOW-1707707: Add support for Index.to_numpy

---
 CHANGELOG.md                                  |   1 +
 .../modin/supported/index_supported.rst       |   2 +
 .../snowpark/modin/plugin/_internal/utils.py  |  16 ++-
 .../modin/plugin/docstrings/series.py         |  55 +++++++++
 .../snowpark/modin/plugin/extensions/index.py | 104 +++++++++++++++++-
 tests/integ/modin/test_to_numpy.py            |   7 +-
 6 files changed, 180 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7ce7fee72f4..7e1aece4b80 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,7 @@
 - Added support for timedelta inputs in `value_counts()`.
 - Added support for applying Snowpark Python function `snowflake_cortex_summarize`.
 - Added support for `DataFrame`/`Series.attrs`
+- Added support for `Index.to_numpy`.
 
 #### Improvements
 
diff --git a/docs/source/modin/supported/index_supported.rst b/docs/source/modin/supported/index_supported.rst
index b67bd0e18a4..ffde21ee377 100644
--- a/docs/source/modin/supported/index_supported.rst
+++ b/docs/source/modin/supported/index_supported.rst
@@ -155,6 +155,8 @@ Methods
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``to_frame``                | Y                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
+| ``to_numpy``                | Y                               |                                  |                                                    |
++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``view``                    | N                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``argsort``                 | N                               |                                  |                                                    |
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/utils.py b/src/snowflake/snowpark/modin/plugin/_internal/utils.py
index 3f01e2d095e..1366a79918b 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/utils.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/utils.py
@@ -14,7 +14,12 @@
 import pandas as native_pd
 from pandas._typing import AnyArrayLike, Scalar
 from pandas.core.dtypes.base import ExtensionDtype
-from pandas.core.dtypes.common import is_integer_dtype, is_object_dtype, is_scalar
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_integer_dtype,
+    is_object_dtype,
+    is_scalar,
+)
 from pandas.core.dtypes.inference import is_list_like
 
 import snowflake.snowpark.modin.plugin._internal.statement_params_constants as STATEMENT_PARAMS
@@ -1557,7 +1562,14 @@ def convert_str_to_timedelta(x: str) -> pd.Timedelta:
             # example, an empty dataframe will be object dtype by default, or a variant, or a timestamp column with
             # multiple timezones. So here we cast the index to the index_type when ret = pd.Index(...) above cannot
             # figure out a non-object dtype. Note that the index_type is a logical type may not be 100% accurate.
-            if is_object_dtype(ret.dtype) and not is_object_dtype(index_type):
+            # We exclude the case where ret.dtype is object dtype while index_dtype is bool dtype. This is because
+            # casting None values to bool converts them to False, which results in a descripency with the pandas
+            # behavior.
+            if (
+                is_object_dtype(ret.dtype)
+                and not is_object_dtype(index_type)
+                and not is_bool_dtype(index_type)
+            ):
                 # TODO: SNOW-1657460 fix index_type for timestamp_tz
                 try:
                     ret = ret.astype(index_type)
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
index 4b0ea8f748c..08091389706 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
@@ -3383,6 +3383,61 @@ def to_numpy():
         Returns
         -------
         numpy.ndarray
+
+        See Also
+        --------
+        Series.array
+            Get the actual data stored within.
+        Index.array
+            Get the actual data stored within.
+        DataFrame.to_numpy
+            Similar method for DataFrame.
+
+        Notes
+        -----
+        The returned array will be the same up to equality (values equal in self will be equal in the returned array; likewise for values that are not equal). When self contains an ExtensionArray, the dtype may be different. For example, for a category-dtype Series, to_numpy() will return a NumPy array and the categorical dtype will be lost.
+
+        For NumPy dtypes, this will be a reference to the actual data stored in this Series or Index (assuming copy=False). Modifying the result in place will modify the data stored in the Series or Index (not that we recommend doing that).
+
+        For extension types, to_numpy() may require copying data and coercing the result to a NumPy type (possibly object), which may be expensive. When you need a no-copy reference to the underlying data, Series.array should be used instead.
+
+        This table lays out the different dtypes and default return types of to_numpy() for various dtypes within pandas.
+
+        ---------------------------------------------------------
+        | dtype              | array type                       |
+        ---------------------------------------------------------
+        | category[T]        | ndarray[T] (same dtype as input) |
+        ---------------------------------------------------------
+        | period             | ndarray[object] (Periods)        |
+        ---------------------------------------------------------
+        | interval           | ndarray[object] (Intervals)      |
+        ---------------------------------------------------------
+        | IntegerNA          | ndarray[object]                  |
+        ---------------------------------------------------------
+        | datetime64[ns]     | datetime64[ns]                   |
+        ---------------------------------------------------------
+        | datetime64[ns, tz] | ndarray[object] (Timestamps)     |
+        ---------------------------------------------------------
+
+        Examples
+        --------
+        >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))  # doctest: +SKIP
+        >>> ser.to_numpy()  # doctest: +SKIP
+        array(['a', 'b', 'a'], dtype=object)
+
+        Specify the dtype to control how datetime-aware data is represented. Use dtype=object to return an ndarray of pandas Timestamp objects, each with the correct tz.
+
+        >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
+        >>> ser.to_numpy(dtype=object)
+        array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
+            Timestamp('2000-01-02 00:00:00+0100', tz='CET')],
+            dtype=object)
+
+        Or dtype='datetime64[ns]' to return an ndarray of native datetime64 values. The values are converted to UTC and the timezone info is dropped.
+
+        >>> ser.to_numpy(dtype="datetime64[ns]")
+        array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'],
+            dtype='datetime64[ns]')
         """
 
     tolist = to_list
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/index.py b/src/snowflake/snowpark/modin/plugin/extensions/index.py
index 1513fe86e08..a0e86c2d82a 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/index.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/index.py
@@ -29,12 +29,13 @@
 
 import modin
 import numpy as np
+import numpy.typing as npt
 import pandas as native_pd
 from modin.pandas import DataFrame, Series
 from modin.pandas.base import BasePandasDataset
 from pandas import get_option
 from pandas._libs import lib
-from pandas._libs.lib import is_list_like, is_scalar
+from pandas._libs.lib import is_list_like, is_scalar, no_default
 from pandas._typing import ArrayLike, DateTimeErrorChoices, DtypeObj, NaPosition, Scalar
 from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.base import ExtensionDtype
@@ -2079,6 +2080,107 @@ def to_frame(
 
         return DataFrame(query_compiler=new_qc)
 
+    def to_numpy(
+        self,
+        dtype: npt.DTypeLike | None = None,
+        copy: bool = False,
+        na_value: object = no_default,
+        **kwargs: Any,
+    ) -> np.ndarray:
+        """
+        A NumPy ndarray representing the values in this Series or Index.
+
+        Parameters
+        ----------
+        dtype : str or numpy.dtype, optional
+            The dtype to pass to :meth:`numpy.asarray`.
+        copy : bool, default False
+            This argument is ignored in Snowflake backend. The data from Snowflake
+            will be retrieved into the client, and a numpy array containing this
+            data will be returned.
+        na_value : Any, optional
+            The value to use for missing values. The default value depends
+            on `dtype` and the type of the array.
+        **kwargs
+            Additional keywords passed through to the ``to_numpy`` method
+            of the underlying array (for extension arrays).
+
+        Returns
+        -------
+        numpy.ndarray
+
+        See Also
+        --------
+        Series.array
+            Get the actual data stored within.
+        Index.array
+            Get the actual data stored within.
+        DataFrame.to_numpy
+            Similar method for DataFrame.
+
+        Notes
+        -----
+        The returned array will be the same up to equality (values equal in self will be equal in the returned array; likewise for values that are not equal). When self contains an ExtensionArray, the dtype may be different. For example, for a category-dtype Series, to_numpy() will return a NumPy array and the categorical dtype will be lost.
+
+        For NumPy dtypes, this will be a reference to the actual data stored in this Series or Index (assuming copy=False). Modifying the result in place will modify the data stored in the Series or Index (not that we recommend doing that).
+
+        For extension types, to_numpy() may require copying data and coercing the result to a NumPy type (possibly object), which may be expensive. When you need a no-copy reference to the underlying data, Series.array should be used instead.
+
+        This table lays out the different dtypes and default return types of to_numpy() for various dtypes within pandas.
+
+        ---------------------------------------------------------
+        | dtype              | array type                       |
+        ---------------------------------------------------------
+        | category[T]        | ndarray[T] (same dtype as input) |
+        ---------------------------------------------------------
+        | period             | ndarray[object] (Periods)        |
+        ---------------------------------------------------------
+        | interval           | ndarray[object] (Intervals)      |
+        ---------------------------------------------------------
+        | IntegerNA          | ndarray[object]                  |
+        ---------------------------------------------------------
+        | datetime64[ns]     | datetime64[ns]                   |
+        ---------------------------------------------------------
+        | datetime64[ns, tz] | ndarray[object] (Timestamps)     |
+        ---------------------------------------------------------
+
+        Examples
+        --------
+        >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))  # doctest: +SKIP
+        >>> ser.to_numpy()  # doctest: +SKIP
+        array(['a', 'b', 'a'], dtype=object)
+
+        Specify the dtype to control how datetime-aware data is represented. Use dtype=object to return an ndarray of pandas Timestamp objects, each with the correct tz.
+
+        >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
+        >>> ser.to_numpy(dtype=object)
+        array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
+            Timestamp('2000-01-02 00:00:00+0100', tz='CET')],
+            dtype=object)
+
+        Or dtype='datetime64[ns]' to return an ndarray of native datetime64 values. The values are converted to UTC and the timezone info is dropped.
+
+        >>> ser.to_numpy(dtype="datetime64[ns]")
+        array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'],
+            dtype='datetime64[ns]')
+        """
+        if copy:
+            WarningMessage.ignored_argument(
+                operation="to_numpy",
+                argument="copy",
+                message="copy is ignored in Snowflake backend",
+            )
+        # return self.to_pandas().array
+        return (
+            self.to_pandas()
+            .to_numpy(
+                dtype=dtype,
+                na_value=na_value,
+                **kwargs,
+            )
+            .flatten()
+        )
+
     @index_not_implemented()
     def fillna(self) -> None:
         """
diff --git a/tests/integ/modin/test_to_numpy.py b/tests/integ/modin/test_to_numpy.py
index 33829d5c39b..0e61f595e4e 100644
--- a/tests/integ/modin/test_to_numpy.py
+++ b/tests/integ/modin/test_to_numpy.py
@@ -42,12 +42,15 @@
         [datetime.datetime(2023, 1, 1), datetime.datetime(2023, 1, 1, 1, 2, 3), None],
     ],
 )
-@pytest.mark.parametrize("pandas_obj", ["DataFrame", "Series"])
+@pytest.mark.parametrize("pandas_obj", ["DataFrame", "Series", "Index"])
 @pytest.mark.parametrize("func", ["to_numpy", "values"])
 def test_to_numpy_basic(data, pandas_obj, func):
     if pandas_obj == "Series":
         df = pd.Series(data)
         native_df = native_pd.Series(data)
+    elif pandas_obj == "Index":
+        df = pd.Index(data)
+        native_df = native_pd.Index(data)
     else:
         df = pd.DataFrame([data, data])
         native_df = native_pd.DataFrame([data, data])
@@ -109,7 +112,7 @@ def test_tz_aware_data_to_numpy(session):
     assert_array_equal(df.to_numpy(), expected_result)
 
 
-@pytest.mark.parametrize("pandas_obj", ["DataFrame", "Series"])
+@pytest.mark.parametrize("pandas_obj", ["DataFrame", "Series", "Index"])
 @sql_count_checker(query_count=1)
 def test_variant_data_to_numpy(pandas_obj):
     data = [

From b57bdd5aa9bc8e8a63d2e07679e895e34a1f7c01 Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem.elmeleegy@snowflake.com>
Date: Fri, 25 Oct 2024 11:21:09 -0700
Subject: [PATCH 2/7] fix tests

---
 .../modin/plugin/_internal/type_utils.py      |  8 +++-
 .../modin/plugin/docstrings/series.py         |  8 +---
 .../snowpark/modin/plugin/extensions/index.py | 13 +++----
 tests/integ/modin/frame/test_getitem.py       | 39 +++++++++----------
 tests/integ/modin/index/test_astype.py        | 12 +++---
 5 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py
index 1b55d3af611..404e5ffd76a 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py
@@ -384,8 +384,12 @@ def column_astype(
             new_col = cast(curr_col, LongType())
     else:
         new_col = cast(curr_col, to_sf_type)
-    # astype should not have any effect on NULL values
-    return iff(curr_col.is_null(), None, new_col)
+    # astype should not have any effect on NULL values except when casting to boolean
+    if isinstance(to_sf_type, BooleanType):
+        # treat NULL values in boolean columns as False to match pandas behavior
+        return iff(curr_col.is_null(), pandas_lit(False), new_col)
+    else:
+        return iff(curr_col.is_null(), None, new_col)
 
 
 def is_astype_type_error(
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
index 08091389706..976d2b01c24 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
@@ -3397,10 +3397,6 @@ def to_numpy():
         -----
         The returned array will be the same up to equality (values equal in self will be equal in the returned array; likewise for values that are not equal). When self contains an ExtensionArray, the dtype may be different. For example, for a category-dtype Series, to_numpy() will return a NumPy array and the categorical dtype will be lost.
 
-        For NumPy dtypes, this will be a reference to the actual data stored in this Series or Index (assuming copy=False). Modifying the result in place will modify the data stored in the Series or Index (not that we recommend doing that).
-
-        For extension types, to_numpy() may require copying data and coercing the result to a NumPy type (possibly object), which may be expensive. When you need a no-copy reference to the underlying data, Series.array should be used instead.
-
         This table lays out the different dtypes and default return types of to_numpy() for various dtypes within pandas.
 
         ---------------------------------------------------------
@@ -3429,8 +3425,8 @@ def to_numpy():
 
         >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
         >>> ser.to_numpy(dtype=object)
-        array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
-            Timestamp('2000-01-02 00:00:00+0100', tz='CET')],
+        array([Timestamp('2000-01-01 00:00:00+0100', tz='UTC+01:00'),
+            Timestamp('2000-01-02 00:00:00+0100', tz='UTC+01:00')],
             dtype=object)
 
         Or dtype='datetime64[ns]' to return an ndarray of native datetime64 values. The values are converted to UTC and the timezone info is dropped.
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/index.py b/src/snowflake/snowpark/modin/plugin/extensions/index.py
index a0e86c2d82a..32316e5f08f 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/index.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/index.py
@@ -2122,10 +2122,6 @@ def to_numpy(
         -----
         The returned array will be the same up to equality (values equal in self will be equal in the returned array; likewise for values that are not equal). When self contains an ExtensionArray, the dtype may be different. For example, for a category-dtype Series, to_numpy() will return a NumPy array and the categorical dtype will be lost.
 
-        For NumPy dtypes, this will be a reference to the actual data stored in this Series or Index (assuming copy=False). Modifying the result in place will modify the data stored in the Series or Index (not that we recommend doing that).
-
-        For extension types, to_numpy() may require copying data and coercing the result to a NumPy type (possibly object), which may be expensive. When you need a no-copy reference to the underlying data, Series.array should be used instead.
-
         This table lays out the different dtypes and default return types of to_numpy() for various dtypes within pandas.
 
         ---------------------------------------------------------
@@ -2154,8 +2150,8 @@ def to_numpy(
 
         >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
         >>> ser.to_numpy(dtype=object)
-        array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
-            Timestamp('2000-01-02 00:00:00+0100', tz='CET')],
+        array([Timestamp('2000-01-01 00:00:00+0100', tz='UTC+01:00'),
+            Timestamp('2000-01-02 00:00:00+0100', tz='UTC+01:00')],
             dtype=object)
 
         Or dtype='datetime64[ns]' to return an ndarray of native datetime64 values. The values are converted to UTC and the timezone info is dropped.
@@ -2170,7 +2166,6 @@ def to_numpy(
                 argument="copy",
                 message="copy is ignored in Snowflake backend",
             )
-        # return self.to_pandas().array
         return (
             self.to_pandas()
             .to_numpy(
@@ -2703,6 +2698,10 @@ def __array__(self, dtype: Any = None) -> np.ndarray:
         """
         The array interface, return the values.
         """
+        # Ensure that the existing index dtype is preserved in the returned array
+        # if no other dtype is given.
+        if dtype is None:
+            dtype = self.dtype
         return self.to_pandas().__array__(dtype=dtype)
 
     def __repr__(self) -> str:
diff --git a/tests/integ/modin/frame/test_getitem.py b/tests/integ/modin/frame/test_getitem.py
index 03923701dd7..8abd281e541 100644
--- a/tests/integ/modin/frame/test_getitem.py
+++ b/tests/integ/modin/frame/test_getitem.py
@@ -36,31 +36,28 @@
         np.array([], dtype=bool),
     ],
 )
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_getitem_with_boolean_list_like(
     key, default_index_snowpark_pandas_df, default_index_native_df
 ):
-    # one added query to convert to native pandas and 1 added query for series initialization
-    with SqlCounter(
-        query_count=3 if isinstance(key, native_pd.Index) else 1, join_count=1
-    ):
-        # df[boolean list-like key] is the same as df.loc[:, boolean list-like key]
-        if isinstance(key, native_pd.Index):
-            key = pd.Index(key)
-
-        def get_helper(df):
-            if isinstance(df, pd.DataFrame):
-                return df[key]
-            # If pandas df, adjust the length of the df and key since boolean keys need to be the same length as the axis.
-            _key = try_convert_index_to_native(key)
-            _df = df.iloc[: len(key)]
-            _key = _key[: _df.shape[1]]
-            return _df[_key]
+    # df[boolean list-like key] is the same as df.loc[:, boolean list-like key]
 
-        eval_snowpark_pandas_result(
-            default_index_snowpark_pandas_df,
-            default_index_native_df,
-            get_helper,
-        )
+    def get_helper(df, key):
+        if isinstance(df, pd.DataFrame):
+            if isinstance(key, native_pd.Index):
+                key = pd.Index(key)
+            return df[key]
+        # If pandas df, adjust the length of the df and key since boolean keys need to be the same length as the axis.
+        _key = try_convert_index_to_native(key)
+        _df = df.iloc[: len(key)]
+        _key = _key[: _df.shape[1]]
+        return _df[_key]
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: get_helper(df, key),
+    )
 
 
 @pytest.mark.parametrize(
diff --git a/tests/integ/modin/index/test_astype.py b/tests/integ/modin/index/test_astype.py
index b4578928825..76cebcec7d6 100644
--- a/tests/integ/modin/index/test_astype.py
+++ b/tests/integ/modin/index/test_astype.py
@@ -46,7 +46,7 @@
 def test_index_astype(index, type):
     snow_index = pd.Index(index)
     with SqlCounter(query_count=1):
-        assert_index_equal(snow_index.astype(type), index.astype(type))
+        assert_index_equal(snow_index.astype(type), index.astype(type), exact=False)
 
 
 @pytest.mark.parametrize(
@@ -104,7 +104,9 @@ def test_index_astype_empty_index(from_type, to_type):
     native_index = native_pd.Index([], dtype=from_type)
     snow_index = pd.Index(native_index)
     with SqlCounter(query_count=1):
-        assert_index_equal(snow_index.astype(to_type), native_index.astype(to_type))
+        assert_index_equal(
+            snow_index.astype(to_type), native_index.astype(to_type), exact=False
+        )
 
 
 @pytest.mark.parametrize(
@@ -166,8 +168,8 @@ def test_index_astype_bool_nan_none():
     snow_index = pd.Index(native_index)
     with pytest.raises(AssertionError):
         assert_index_equal(snow_index.astype(bool), native_index.astype(bool))
-    expected_result = native_pd.Index([True, True, True, False, False], dtype=bool)
-    assert_index_equal(snow_index.astype(bool), expected_result)
+    expected_result = native_pd.Index([True, True, True, None, None], dtype=bool)
+    assert_index_equal(snow_index.astype(bool), expected_result, exact=False)
 
     # Another case where this arises is when a float Index with "None" in it is used. pandas
     # converts None to NaN during Index creation and thus leads to this difference.
@@ -180,7 +182,7 @@ def test_index_astype_bool_nan_none():
     expected_result = native_pd.Index(
         [True, True, True, True, False, False], dtype=bool
     )
-    assert_index_equal(snow_index.astype(bool), expected_result)
+    assert_index_equal(snow_index.astype(bool), expected_result, exact=False)
 
 
 @sql_count_checker(query_count=2)

From f7eccfe8d300d35c8e4a8e3d0e48d7ef6b8ce6ba Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem.elmeleegy@snowflake.com>
Date: Fri, 25 Oct 2024 11:44:07 -0700
Subject: [PATCH 3/7] fix errors

---
 .../snowpark/modin/plugin/docstrings/series.py   | 16 ++++++++--------
 .../snowpark/modin/plugin/extensions/index.py    | 16 ++++++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
index 976d2b01c24..37169ede461 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
@@ -3399,21 +3399,21 @@ def to_numpy():
 
         This table lays out the different dtypes and default return types of to_numpy() for various dtypes within pandas.
 
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | dtype              | array type                       |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | category[T]        | ndarray[T] (same dtype as input) |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | period             | ndarray[object] (Periods)        |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | interval           | ndarray[object] (Intervals)      |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | IntegerNA          | ndarray[object]                  |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | datetime64[ns]     | datetime64[ns]                   |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | datetime64[ns, tz] | ndarray[object] (Timestamps)     |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
 
         Examples
         --------
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/index.py b/src/snowflake/snowpark/modin/plugin/extensions/index.py
index 32316e5f08f..b7512b34e61 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/index.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/index.py
@@ -2124,21 +2124,21 @@ def to_numpy(
 
         This table lays out the different dtypes and default return types of to_numpy() for various dtypes within pandas.
 
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | dtype              | array type                       |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | category[T]        | ndarray[T] (same dtype as input) |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | period             | ndarray[object] (Periods)        |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | interval           | ndarray[object] (Intervals)      |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | IntegerNA          | ndarray[object]                  |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | datetime64[ns]     | datetime64[ns]                   |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
         | datetime64[ns, tz] | ndarray[object] (Timestamps)     |
-        ---------------------------------------------------------
+        +--------------------+----------------------------------+
 
         Examples
         --------

From e60801afbbd8d7ba153051504d70c580d6985ed3 Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem.elmeleegy@snowflake.com>
Date: Fri, 25 Oct 2024 12:14:27 -0700
Subject: [PATCH 4/7] fix errors

---
 tests/integ/modin/index/test_astype.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/integ/modin/index/test_astype.py b/tests/integ/modin/index/test_astype.py
index 76cebcec7d6..2b7030375ae 100644
--- a/tests/integ/modin/index/test_astype.py
+++ b/tests/integ/modin/index/test_astype.py
@@ -46,7 +46,7 @@
 def test_index_astype(index, type):
     snow_index = pd.Index(index)
     with SqlCounter(query_count=1):
-        assert_index_equal(snow_index.astype(type), index.astype(type), exact=False)
+        assert_index_equal(snow_index.astype(type), index.astype(type))
 
 
 @pytest.mark.parametrize(
@@ -104,6 +104,8 @@ def test_index_astype_empty_index(from_type, to_type):
     native_index = native_pd.Index([], dtype=from_type)
     snow_index = pd.Index(native_index)
     with SqlCounter(query_count=1):
+        # exact=False is used because of a discrepancy in the "inferred_type" attribute
+        # when to_type is bool between Snowpark pandas (empty) and native pandas (bool).
         assert_index_equal(
             snow_index.astype(to_type), native_index.astype(to_type), exact=False
         )
@@ -168,8 +170,8 @@ def test_index_astype_bool_nan_none():
     snow_index = pd.Index(native_index)
     with pytest.raises(AssertionError):
         assert_index_equal(snow_index.astype(bool), native_index.astype(bool))
-    expected_result = native_pd.Index([True, True, True, None, None], dtype=bool)
-    assert_index_equal(snow_index.astype(bool), expected_result, exact=False)
+    expected_result = native_pd.Index([True, True, True, False, False], dtype=bool)
+    assert_index_equal(snow_index.astype(bool), expected_result)
 
     # Another case where this arises is when a float Index with "None" in it is used. pandas
     # converts None to NaN during Index creation and thus leads to this difference.
@@ -182,7 +184,7 @@ def test_index_astype_bool_nan_none():
     expected_result = native_pd.Index(
         [True, True, True, True, False, False], dtype=bool
     )
-    assert_index_equal(snow_index.astype(bool), expected_result, exact=False)
+    assert_index_equal(snow_index.astype(bool), expected_result)
 
 
 @sql_count_checker(query_count=2)

From 4f2784fa9e0f7be710988a035997c5feb502f543 Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem.elmeleegy@snowflake.com>
Date: Fri, 25 Oct 2024 13:56:46 -0700
Subject: [PATCH 5/7] fix errors

---
 src/snowflake/snowpark/modin/plugin/_internal/type_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py
index 404e5ffd76a..90f470d44f4 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py
@@ -306,6 +306,10 @@ def column_astype(
     if to_dtype == np.object_:
         return to_variant(curr_col)
     if from_sf_type == to_sf_type:
+        if isinstance(to_sf_type, BooleanType):
+            new_col = to_variant(curr_col)
+            # treat NULL values in boolean columns as False to match pandas behavior
+            return iff(curr_col.is_null(), False, curr_col)
         return curr_col
 
     if isinstance(to_sf_type, _IntegralType) and "int64" not in str(to_dtype).lower():
@@ -387,7 +391,7 @@ def column_astype(
     # astype should not have any effect on NULL values except when casting to boolean
     if isinstance(to_sf_type, BooleanType):
         # treat NULL values in boolean columns as False to match pandas behavior
-        return iff(curr_col.is_null(), pandas_lit(False), new_col)
+        return iff(curr_col.is_null(), False, new_col)
     else:
         return iff(curr_col.is_null(), None, new_col)
 

From 2207d65fa30b112de90199d9ca04dd4869d301df Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem.elmeleegy@snowflake.com>
Date: Fri, 25 Oct 2024 14:48:29 -0700
Subject: [PATCH 6/7] fix errors

---
 src/snowflake/snowpark/modin/plugin/docstrings/series.py | 6 +++---
 src/snowflake/snowpark/modin/plugin/extensions/index.py  | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
index 37169ede461..db96b18d4de 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
@@ -3426,14 +3426,14 @@ def to_numpy():
         >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
         >>> ser.to_numpy(dtype=object)
         array([Timestamp('2000-01-01 00:00:00+0100', tz='UTC+01:00'),
-            Timestamp('2000-01-02 00:00:00+0100', tz='UTC+01:00')],
-            dtype=object)
+               Timestamp('2000-01-02 00:00:00+0100', tz='UTC+01:00')],
+              dtype=object)
 
         Or dtype='datetime64[ns]' to return an ndarray of native datetime64 values. The values are converted to UTC and the timezone info is dropped.
 
         >>> ser.to_numpy(dtype="datetime64[ns]")
         array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'],
-            dtype='datetime64[ns]')
+              dtype='datetime64[ns]')
         """
 
     tolist = to_list
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/index.py b/src/snowflake/snowpark/modin/plugin/extensions/index.py
index b7512b34e61..e2e5d0c726c 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/index.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/index.py
@@ -2151,14 +2151,14 @@ def to_numpy(
         >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
         >>> ser.to_numpy(dtype=object)
         array([Timestamp('2000-01-01 00:00:00+0100', tz='UTC+01:00'),
-            Timestamp('2000-01-02 00:00:00+0100', tz='UTC+01:00')],
-            dtype=object)
+               Timestamp('2000-01-02 00:00:00+0100', tz='UTC+01:00')],
+              dtype=object)
 
         Or dtype='datetime64[ns]' to return an ndarray of native datetime64 values. The values are converted to UTC and the timezone info is dropped.
 
         >>> ser.to_numpy(dtype="datetime64[ns]")
         array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'],
-            dtype='datetime64[ns]')
+              dtype='datetime64[ns]')
         """
         if copy:
             WarningMessage.ignored_argument(

From 547e42371e9e431c64d20562ade647360c9d2fbc Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem.elmeleegy@snowflake.com>
Date: Fri, 25 Oct 2024 19:15:07 -0700
Subject: [PATCH 7/7] fix errors

---
 tests/integ/modin/test_to_numpy.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tests/integ/modin/test_to_numpy.py b/tests/integ/modin/test_to_numpy.py
index 0e61f595e4e..c6745b8b530 100644
--- a/tests/integ/modin/test_to_numpy.py
+++ b/tests/integ/modin/test_to_numpy.py
@@ -146,7 +146,7 @@ def test_variant_data_to_numpy(pandas_obj):
 
 
 @sql_count_checker(query_count=1)
-def test_to_numpy_copy_true(caplog):
+def test_to_numpy_copy_true_series(caplog):
     series = pd.Series([1])
 
     caplog.clear()
@@ -156,6 +156,17 @@ def test_to_numpy_copy_true(caplog):
         assert "has been ignored by Snowpark pandas" in caplog.text
 
 
+@sql_count_checker(query_count=1)
+def test_to_numpy_copy_true_index(caplog):
+    idx = pd.Index([1])
+
+    caplog.clear()
+    WarningMessage.printed_warnings.clear()
+    with caplog.at_level(logging.WARNING):
+        assert_array_equal(idx.to_numpy(copy=True), native_pd.Index([1]).to_numpy())
+        assert "has been ignored by Snowpark pandas" in caplog.text
+
+
 @sql_count_checker(query_count=1)
 def test_to_numpy_warning(caplog):
     series = pd.Series([1])