From 46ab43110f0659571a0a9e3bfc11ac56d84f4215 Mon Sep 17 00:00:00 2001
From: Devanshu Kumar Singh
 <43195822+Devanshusisodiya@users.noreply.github.com>
Date: Sun, 11 Feb 2024 15:29:04 +0000
Subject: [PATCH 1/7] fix for warning if column is multi categorical in
 h2o.cor()

---
 h2o-py/h2o/frame.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/h2o-py/h2o/frame.py b/h2o-py/h2o/frame.py
index 59dc1d053547..c13ea7c1d89d 100644
--- a/h2o-py/h2o/frame.py
+++ b/h2o-py/h2o/frame.py
@@ -3208,8 +3208,21 @@ def cor(self, y=None, na_rm=False, use=None, method="Pearson"):
         assert_is_type(y, H2OFrame, None)
         assert_is_type(na_rm, bool)
         assert_is_type(use, None, "everything", "all.obs", "complete.obs")
+        y_categorical = any(y[col_name].isfactor() for col_name in y)
+        
         if y is None:
             y = self
+
+        if y_categorical:
+            num_unique_levels = {col: self[col].nlevels()[0] for col in y}
+            multi_categorical = any(num_levels > 2 for num_levels in num_unique_levels.values())
+
+        if multi_categorical:
+            import warnings
+            for col, nlevel in num_unique_levels.items():
+                if nlevel > 2:
+                    warnings.warn("Column {} contains {} levels. Only numerical and categorical columns are supported.".format(col, nlevel))
+
         if use is None: use = "complete.obs" if na_rm else "everything"
         if self.nrow == 1 or (self.ncol == 1 and y.ncol == 1): return ExprNode("cor", self, y, use, method)._eager_scalar()
         return H2OFrame._expr(expr=ExprNode("cor", self, y, use, method))._frame()

From b328502e864f0e0a7ef83f25192bc5c62ca3e240 Mon Sep 17 00:00:00 2001
From: Devanshu Kumar Singh
 <43195822+Devanshusisodiya@users.noreply.github.com>
Date: Mon, 12 Feb 2024 03:38:07 +0000
Subject: [PATCH 2/7] added fix for warning if column is multi categorical in R

---
 h2o-r/h2o-package/R/frame.R | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/h2o-r/h2o-package/R/frame.R b/h2o-r/h2o-package/R/frame.R
index c1e46f9ac5f0..951706c226c6 100644
--- a/h2o-r/h2o-package/R/frame.R
+++ b/h2o-r/h2o-package/R/frame.R
@@ -2902,6 +2902,13 @@ h2o.cor <- function(x, y=NULL,na.rm = FALSE, use, method="Pearson"){
   if (is.null(method) || is.na(method)) {
     stop("Correlation method must be specified.")
   }
+
+  # Check for categorical columns in x and y
+  x_categorical <- any(h2o.isfactor(x))
+  y_categorical <- any(h2o.isfactor(y))
+
+  if ((x_categorical && length(unique(h2o.levels(x))) > 2) || (y_categorical && length(unique(h2o.levels(y))) > 2)) {
+    warning("Column x or y contains more than 2 levels. Only numerical and categorical columns are supported.")
   
   # Eager, mostly to match prior semantics but no real reason it need to be
   expr <- .newExpr("cor",x,y,.quote(use), .quote(method))

From af62fa4a23bfbcfc4e5a18ed6dbd36655529ba0e Mon Sep 17 00:00:00 2001
From: Devanshusisodiya <devanshukumar45@gmail.com>
Date: Tue, 13 Feb 2024 15:21:29 +0530
Subject: [PATCH 3/7] fix for issue #15947

---
 h2o-py/h2o/frame.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/h2o-py/h2o/frame.py b/h2o-py/h2o/frame.py
index c13ea7c1d89d..a86b62ec5adb 100644
--- a/h2o-py/h2o/frame.py
+++ b/h2o-py/h2o/frame.py
@@ -146,6 +146,12 @@ def _upload_python_object(self, python_obj, destination_frame=None, header=0, se
         if not column_names:
             column_names = col_header
 
+        if skipped_columns:
+            column_names = [column_name for column_name in column_names[:len(column_names)-len(skipped_columns)]]
+
+        if not column_names:
+            raise H2OValueError("skipped_columns cannot contain all columns")
+
         # create a temporary file that will be written to
         tmp_handle, tmp_path = tempfile.mkstemp(suffix=".csv")
         tmp_file = os.fdopen(tmp_handle, 'w', **H2OFrame.__fdopen_kwargs)

From d012d77a14f865402c60828051826c08ae399f86 Mon Sep 17 00:00:00 2001
From: Devanshusisodiya <devanshukumar45@gmail.com>
Date: Tue, 13 Feb 2024 15:39:21 +0530
Subject: [PATCH 4/7] wrote test for fix #15947

---
 .../Data_Manipulation/pyunit_h2oH2OFrame.py          | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/h2o-py/tests/testdir_apis/Data_Manipulation/pyunit_h2oH2OFrame.py b/h2o-py/tests/testdir_apis/Data_Manipulation/pyunit_h2oH2OFrame.py
index b9f101dda2a7..7dfbe120c831 100644
--- a/h2o-py/tests/testdir_apis/Data_Manipulation/pyunit_h2oH2OFrame.py
+++ b/h2o-py/tests/testdir_apis/Data_Manipulation/pyunit_h2oH2OFrame.py
@@ -125,10 +125,16 @@ def H2OFrame_from_H2OFrame():
     assert dupl4.columns == ["n1", "s1"]
 
 
-def H2OFrame_skipped_columns_is_BUGGY():
+def H2OFrame_skipped_columns():
     try:
-        h2o.H2OFrame(data, skipped_columns=[1])
-        assert False, "skipped_columns handling may be fixed now"  # parse_setup is absolutely weird, with only half parameters passed to build the ParseSetup, and then a bunch of logic done locally, that's why it's buggy: see issue https://github.com/h2oai/h2o-3/issues/15947 
+        fr = h2o.H2OFrame([
+            [1, "a",  1],
+            [2, "b",  0],
+            [3,  "", 1],
+            ]
+        )
+        skipped_columns_frame = h2o.H2OFrame(data, skipped_columns=[1])
+        assert skipped_columns_frame == fr # parse_setup is absolutely weird, with only half parameters passed to build the ParseSetup, and then a bunch of logic done locally, that's why it's buggy: see issue https://github.com/h2oai/h2o-3/issues/15947 
     except ValueError as e:
         assert "length of col_names should be equal to the number of columns parsed: 4 vs 3" in str(e)
 

From 68e7d13c032f63155caff7c2fff3c9734caa1498 Mon Sep 17 00:00:00 2001
From: Devanshusisodiya <devanshukumar45@gmail.com>
Date: Tue, 13 Feb 2024 15:44:19 +0530
Subject: [PATCH 5/7] undo commit

---
 .../Data_Manipulation/pyunit_h2oH2OFrame.py          | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/h2o-py/tests/testdir_apis/Data_Manipulation/pyunit_h2oH2OFrame.py b/h2o-py/tests/testdir_apis/Data_Manipulation/pyunit_h2oH2OFrame.py
index 7dfbe120c831..b9f101dda2a7 100644
--- a/h2o-py/tests/testdir_apis/Data_Manipulation/pyunit_h2oH2OFrame.py
+++ b/h2o-py/tests/testdir_apis/Data_Manipulation/pyunit_h2oH2OFrame.py
@@ -125,16 +125,10 @@ def H2OFrame_from_H2OFrame():
     assert dupl4.columns == ["n1", "s1"]
 
 
-def H2OFrame_skipped_columns():
+def H2OFrame_skipped_columns_is_BUGGY():
     try:
-        fr = h2o.H2OFrame([
-            [1, "a",  1],
-            [2, "b",  0],
-            [3,  "", 1],
-            ]
-        )
-        skipped_columns_frame = h2o.H2OFrame(data, skipped_columns=[1])
-        assert skipped_columns_frame == fr # parse_setup is absolutely weird, with only half parameters passed to build the ParseSetup, and then a bunch of logic done locally, that's why it's buggy: see issue https://github.com/h2oai/h2o-3/issues/15947 
+        h2o.H2OFrame(data, skipped_columns=[1])
+        assert False, "skipped_columns handling may be fixed now"  # parse_setup is absolutely weird, with only half parameters passed to build the ParseSetup, and then a bunch of logic done locally, that's why it's buggy: see issue https://github.com/h2oai/h2o-3/issues/15947 
     except ValueError as e:
         assert "length of col_names should be equal to the number of columns parsed: 4 vs 3" in str(e)
 

From 2358b35ec02b7f322eb82f5139b1fe705a1fb2b5 Mon Sep 17 00:00:00 2001
From: Devanshusisodiya <devanshukumar45@gmail.com>
Date: Tue, 13 Feb 2024 15:49:26 +0530
Subject: [PATCH 6/7] undo changes

---
 h2o-py/h2o/frame.py | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/h2o-py/h2o/frame.py b/h2o-py/h2o/frame.py
index a86b62ec5adb..59dc1d053547 100644
--- a/h2o-py/h2o/frame.py
+++ b/h2o-py/h2o/frame.py
@@ -146,12 +146,6 @@ def _upload_python_object(self, python_obj, destination_frame=None, header=0, se
         if not column_names:
             column_names = col_header
 
-        if skipped_columns:
-            column_names = [column_name for column_name in column_names[:len(column_names)-len(skipped_columns)]]
-
-        if not column_names:
-            raise H2OValueError("skipped_columns cannot contain all columns")
-
         # create a temporary file that will be written to
         tmp_handle, tmp_path = tempfile.mkstemp(suffix=".csv")
         tmp_file = os.fdopen(tmp_handle, 'w', **H2OFrame.__fdopen_kwargs)
@@ -3214,21 +3208,8 @@ def cor(self, y=None, na_rm=False, use=None, method="Pearson"):
         assert_is_type(y, H2OFrame, None)
         assert_is_type(na_rm, bool)
         assert_is_type(use, None, "everything", "all.obs", "complete.obs")
-        y_categorical = any(y[col_name].isfactor() for col_name in y)
-        
         if y is None:
             y = self
-
-        if y_categorical:
-            num_unique_levels = {col: self[col].nlevels()[0] for col in y}
-            multi_categorical = any(num_levels > 2 for num_levels in num_unique_levels.values())
-
-        if multi_categorical:
-            import warnings
-            for col, nlevel in num_unique_levels.items():
-                if nlevel > 2:
-                    warnings.warn("Column {} contains {} levels. Only numerical and categorical columns are supported.".format(col, nlevel))
-
         if use is None: use = "complete.obs" if na_rm else "everything"
         if self.nrow == 1 or (self.ncol == 1 and y.ncol == 1): return ExprNode("cor", self, y, use, method)._eager_scalar()
         return H2OFrame._expr(expr=ExprNode("cor", self, y, use, method))._frame()

From 6d2d82aa10f644b2504a5b9b414404e507a85f9b Mon Sep 17 00:00:00 2001
From: Devanshusisodiya <devanshukumar45@gmail.com>
Date: Tue, 13 Feb 2024 15:59:49 +0530
Subject: [PATCH 7/7] undo changes

---
 h2o-r/h2o-package/R/frame.R | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/h2o-r/h2o-package/R/frame.R b/h2o-r/h2o-package/R/frame.R
index 951706c226c6..b8f3c5b67d72 100644
--- a/h2o-r/h2o-package/R/frame.R
+++ b/h2o-r/h2o-package/R/frame.R
@@ -2903,13 +2903,6 @@ h2o.cor <- function(x, y=NULL,na.rm = FALSE, use, method="Pearson"){
     stop("Correlation method must be specified.")
   }
 
-  # Check for categorical columns in x and y
-  x_categorical <- any(h2o.isfactor(x))
-  y_categorical <- any(h2o.isfactor(y))
-
-  if ((x_categorical && length(unique(h2o.levels(x))) > 2) || (y_categorical && length(unique(h2o.levels(y))) > 2)) {
-    warning("Column x or y contains more than 2 levels. Only numerical and categorical columns are supported.")
-  
   # Eager, mostly to match prior semantics but no real reason it need to be
   expr <- .newExpr("cor",x,y,.quote(use), .quote(method))
   if( (nrow(x)==1L || (ncol(x)==1L && ncol(y)==1L)) ) .eval.scalar(expr)