diff --git a/h2o-py/h2o/frame.py b/h2o-py/h2o/frame.py index 235a69b6c302..e026cb5e9203 100644 --- a/h2o-py/h2o/frame.py +++ b/h2o-py/h2o/frame.py @@ -3182,9 +3182,16 @@ def cor(self, y=None, na_rm=False, use=None, method="Pearson"): assert_is_type(y, H2OFrame, None) assert_is_type(na_rm, bool) assert_is_type(use, None, "everything", "all.obs", "complete.obs") - if y is None: - y = self - if use is None: use = "complete.obs" if na_rm else "everything" + y_categorical = any(self.types[col_name] == "enum" for col_name in y) + + if y_categorical: + num_unique_levels = {col: len(self[col].levels()) for col in y} + multi_categorical = any(num_levels > 2 for num_levels in num_unique_levels.values()) + + if multi_categorical: + import warnings + warnings.warn("NA") + if self.nrow == 1 or (self.ncol == 1 and y.ncol == 1): return ExprNode("cor", self, y, use, method)._eager_scalar() return H2OFrame._expr(expr=ExprNode("cor", self, y, use, method))._frame() diff --git a/h2o-r/h2o-package/R/frame.R b/h2o-r/h2o-package/R/frame.R index e90e1e564029..4f627d183847 100644 --- a/h2o-r/h2o-package/R/frame.R +++ b/h2o-r/h2o-package/R/frame.R @@ -2880,25 +2880,36 @@ var <- function(x, y = NULL, na.rm = FALSE, use) { #' cor(prostate$AGE) #' } #' @export -h2o.cor <- function(x, y=NULL,na.rm = FALSE, use, method="Pearson"){ +h2o.cor <- function(x, y = NULL, na.rm = FALSE, use, method = "Pearson") { # Eager, mostly to match prior semantics but no real reason it need to be - if( is.null(y) ){ + if (is.null(y)) { y <- x } - if(missing(use)) { + if (missing(use)) { if (na.rm) use <- "complete.obs" else use <- "everything" } if (is.null(method) || is.na(method)) { stop("Correlation method must be specified.") } + + # Check for categorical columns in x and y + x_categorical <- any(h2o.isfactor(x)) + y_categorical <- any(h2o.isfactor(y)) + if ((x_categorical && length(unique(h2o.levels(x))) > 2) || (y_categorical && length(unique(h2o.levels(y))) > 2)) { + warning("NA") + } + + # Eager, mostly to match prior semantics but no real reason it need to be - expr <- .newExpr("cor",x,y,.quote(use), .quote(method)) - if( (nrow(x)==1L || (ncol(x)==1L && ncol(y)==1L)) ) .eval.scalar(expr) - else .fetch.data(expr,ncol(x)) + expr <- .newExpr("cor", x, y, .quote(use), .quote(method)) + if ((nrow(x) == 1L || (ncol(x) == 1L && ncol(y) == 1L))) .eval.scalar(expr) + else .fetch.data(expr, ncol(x)) } + + #' #' Compute a pairwise distance measure between all rows of two numeric H2OFrames. #'