v1.3.0

msberends · Jan 24, 2020 · 875ac59 · 875ac59
1 parent 1c4cb4d
commit 875ac59
Show file tree

Hide file tree

Showing 36 changed files with 277 additions and 123 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: cleaner
 Title: Fast and Easy Data Cleaning
-Version: 1.2.0
-Date: 2019-11-04
+Version: 1.3.0
+Date: 2020-01-24
 Authors@R: 
   person(
     given = c("Matthijs", "S."),
@@ -29,4 +29,4 @@ BugReports: https://github.com/msberends/cleaner/issues
 License: GPL-2
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 6.1.1
+RoxygenNote: 7.0.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -37,6 +37,7 @@ S3method(max,percentage)
 S3method(mean,currency)
 S3method(mean,percentage)
 S3method(median,currency)
+S3method(median,percentage)
 S3method(min,currency)
 S3method(min,percentage)
 S3method(pillar_shaft,currency)
@@ -58,7 +59,9 @@ export(clean_Date)
 export(clean_POSIXct)
 export(clean_character)
 export(clean_currency)
+export(clean_double)
 export(clean_factor)
+export(clean_integer)
 export(clean_logical)
 export(clean_numeric)
 export(clean_percentage)
@@ -113,6 +116,7 @@ exportMethods(max.percentage)
 exportMethods(mean.currency)
 exportMethods(mean.percentage)
 exportMethods(median.currency)
+exportMethods(median.percentage)
 exportMethods(min.currency)
 exportMethods(min.percentage)
 exportMethods(plot.freq)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,10 @@
+# cleaner 1.3.0
+
+* Added functions `clean_double()` and `clean_integer()`
+* Added a method for `median()` in percentages
+* Fixed a bug where `NA` in percentages would not be formatted correctly
+* Fixed a bug in frequency tables where sometimes the number of digits used for percentages would be astronomical
+
 # cleaner 1.2.0
 
 * **DUE TO CRAN POLICY: RENAMED TO PACKAGE TO `cleaner`**

diff --git a/R/clean.R b/R/clean.R
@@ -6,7 +6,7 @@
 # https://github.com/msberends/cleaner                                 #
 #                                                                      #
 # LICENCE                                                              #
-# (c) 2019 Berends MS (m.s.berends@umcg.nl)                            #
+# (c) 2020 Berends MS (m.s.berends@umcg.nl)                            #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@@ -44,7 +44,7 @@
 #' \itemize{
 #'   \item{\code{clean_logical()}:\cr}{Use parameters \code{true} and \code{false} to match values using case-insensitive regular expressions (\link[base]{regex}). Unmatched values are considered \code{NA}. At default, values are matched with \code{\link{regex_true}} and \code{\link{regex_false}}. This allows support for values "Yes" and "No" in the following languages: Arabic, Bengali, Chinese (Mandarin), Dutch, English, French, German, Hindi, Indonesian, Japanese, Malay, Portuguese, Russian, Spanish, Telugu, Turkish and Urdu. Use parameter \code{na} to override values as \code{NA} that would else be matched with \code{true} or \code{false}. See Examples.}
 #'   \item{\code{clean_factor()}:\cr}{Use parameter \code{levels} to set new factor levels. They can be case-insensitive regular expressions to match existing values of \code{x}. For matching, new values for \code{levels} are internally temporary sorted descending on text length. See Examples.}
-#'   \item{\code{clean_numeric()} and \code{clean_character()}:\cr}{Use parameter \code{remove} to match values that must be removed from the input, using regular expressions (\link[base]{regex}). In case of \code{clean_numeric()}, comma's will be read as dots and only the last dot will be kept. Function \code{clean_character()} will keep middle spaces at default. See Examples.}
+#'   \item{\code{clean_numeric()}, \code{clean_double()}, \code{clean_integer()} and \code{clean_character()}:\cr}{Use parameter \code{remove} to match values that must be removed from the input, using regular expressions (\link[base]{regex}). In case of \code{clean_numeric()}, comma's will be read as dots and only the last dot will be kept. Function \code{clean_character()} will keep middle spaces at default. See Examples.}
 #'   \item{\code{clean_percentage()}:\cr}{This new class works like \code{clean_numeric()}, but transforms it with \code{\link{as.percentage}}, which will retain the original values, but will print them as percentages. See Examples.} 
 #'   \item{\code{clean_currency()}:\cr}{This new class works like \code{clean_numeric()}, but transforms it with \code{\link{as.currency}}. The currency symbol is guessed based on the most traded currencies by value (see Source): the United States dollar, Euro, Japanese yen, Pound sterling, Swiss franc, Renminbi, Swedish krona, Mexican peso, South Korean won, Turkish lira, Russian ruble, Indian rupee and the South African rand. See Examples.}
 #'   \item{\code{clean_Date()}:\cr}{Use parameter \code{format} to define a date format, or leave it empty to have the format guessed. Use \code{"Excel"} to read values as Microsoft Excel dates. The \code{format} parameter will be evaluated with \code{\link{format_datetime}}, which means that a format like \code{"d-mmm-yy"} with be translated internally to \code{"\%e-\%b-\%y"} for convenience. See Examples.}
@@ -57,7 +57,8 @@
 #' \itemize{
 #'   \item{\code{clean_logical()}: class \code{logical}}
 #'   \item{\code{clean_factor()}: class \code{factor}}
-#'   \item{\code{clean_numeric()}: class \code{numeric}}
+#'   \item{\code{clean_numeric()} & \code{clean_double()}: class \code{numeric}}
+#'   \item{\code{clean_integer()}: class \code{integer}}
 #'   \item{\code{clean_character()}: class \code{character}}
 #'   \item{\code{clean_percentage()}: class \code{percentage}}
 #'   \item{\code{clean_currency()}: class \code{currency}}
@@ -88,8 +89,8 @@
 #' clean_Date(c("1 Oct 13", "October 1st 2012")) # could not be fitted in 1 format
 #' clean_Date(c("1 Oct 13", "October 1st 2012"), guess_each = TRUE)
 #' 
-#' clean_POSIXct("Created log on 2019/02/11 11:23 by user Joe")
-#' clean_POSIXct("Created log on 2019.02.11 11:23 by user Joe", tz = "UTC")
+#' clean_POSIXct("Created log on 2020/02/11 11:23 by user Joe")
+#' clean_POSIXct("Created log on 2020.02.11 11:23 by user Joe", tz = "UTC")
 #' 
 #' clean_numeric("qwerty123456")
 #' clean_numeric("Positive (0.143)")
@@ -107,7 +108,7 @@
 #'  
 #' clean("12 06 2012")
 #' 
-#' df <- data.frame(A = c("2 Apr 2016", "5 Feb 2019"), 
+#' df <- data.frame(A = c("2 Apr 2016", "5 Feb 2020"), 
 #'                  B = c("yes", "no"),
 #'                  C = c("Total of -12.3%", "Total of +4.5%"),
 #'                  D = c("Marker: 0.4513 mmol/l", "Marker: 0.2732 mmol/l"))
@@ -245,6 +246,16 @@ clean_numeric <- function(x, remove = "[^0-9.,-]", fixed = FALSE) {
   x_numeric
 }
 
+#' @rdname clean
+#' @export
+clean_double <- clean_numeric
+
+#' @rdname clean
+#' @export
+clean_integer <- function(x, remove = "[^0-9.,-]", fixed = FALSE) {
+  as.integer(clean_numeric(x = x, remove = remove, fixed = fixed))
+}
+
 #' @rdname clean
 #' @export
 clean_character <- function(x, remove = "[^a-z \t\r\n]", fixed = FALSE, ignore.case = TRUE, trim = TRUE) {

diff --git a/R/currency.R b/R/currency.R
@@ -6,7 +6,7 @@
 # https://github.com/msberends/cleaner                                 #
 #                                                                      #
 # LICENCE                                                              #
-# (c) 2019 Berends MS (m.s.berends@umcg.nl)                            #
+# (c) 2020 Berends MS (m.s.berends@umcg.nl)                            #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #

diff --git a/R/data.R b/R/data.R
@@ -6,7 +6,7 @@
 # https://github.com/msberends/cleaner                                 #
 #                                                                      #
 # LICENCE                                                              #
-# (c) 2019 Berends MS (m.s.berends@umcg.nl)                            #
+# (c) 2020 Berends MS (m.s.berends@umcg.nl)                            #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #

diff --git a/R/format_datetime.R b/R/format_datetime.R
@@ -6,7 +6,7 @@
 # https://github.com/msberends/cleaner                                 #
 #                                                                      #
 # LICENCE                                                              #
-# (c) 2019 Berends MS (m.s.berends@umcg.nl)                            #
+# (c) 2020 Berends MS (m.s.berends@umcg.nl)                            #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@@ -19,18 +19,21 @@
 
 #' Readable date format to POSIX
 #' 
-#' Use this function to transform generic date/time info writing (dd-mm-yyyy) to POSIX standardised format (\%d-\%m-\%Y), see Examples.
+#' Use this function to transform generic date/time info writing (dd-mm-yyyy) into POSIX standardised format (\%d-\%m-\%Y), see Examples.
 #' @param format the format that needs to be transformed
 #' @export
 #' @return A \code{character} string (a POSIX standardised format)
 #' @examples 
-#' format_datetime("yyyy/mm/dd")
+#' format_datetime("yyyy-mm-dd")
 #' 
 #' # Very hard to remember all these characters:
 #' format(Sys.time(), "%a %b %d %Y %X")
 #' 
 #' # Easy to remember and write the same as above:
 #' format(Sys.time(), format_datetime("ddd mmm dd yyyy HH:MM:ss"))
+#' 
+#' # seconds since the Epoch, 1970-01-01 00:00:00
+#' format(Sys.time(), format_datetime("epoch"))
 format_datetime <- function(format) {
   if (!any(grepl('%', format, fixed = TRUE))) {
     # first months and minutes, after that everything is caseINsensitive
@@ -64,6 +67,7 @@ format_datetime <- function(format) {
 
     # seconds since the Epoch, 1970-01-01 00:00:00
     format <- gsub('unix', '%s', format, fixed = TRUE)
+    format <- gsub('epoc%k', '%s', format, fixed = TRUE) # the h has become %k
 
     # equivalent to %Y-%m-%d (the ISO 8601 date format)
     format <- gsub('iso', '%F', format, fixed = TRUE)

diff --git a/R/freq.R b/R/freq.R
@@ -6,7 +6,7 @@
 # https://github.com/msberends/cleaner                                 #
 #                                                                      #
 # LICENCE                                                              #
-# (c) 2019 Berends MS (m.s.berends@umcg.nl)                            #
+# (c) 2020 Berends MS (m.s.berends@umcg.nl)                            #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@@ -625,15 +625,14 @@ print.freq <- function(x,
   opt <- attr(x, "opt")
   if (is.null(opt)) {
     # selection of frequency table, return original class
-    class(x) <- class(x)[!class(x) %in% c("freq", "frequency_tbl")]
+    class(x) <- class(x)[!class(x) == "freq"]
     print(x)
     return(invisible())
   }
 
   if (!is.null(opt$format)) {
     is.Date <- function(x) inherits(x, c("Date", "POSIXct"))
     x$item <- format(x$item, format = ifelse(is.Date(x$item), format_datetime(opt$format), opt$format))
-    quote <- FALSE
   }
 
   opt$header_txt <- header(x)
@@ -705,12 +704,13 @@ print.freq <- function(x,
     }
     return(invisible())
   }
+
+  if (is.null(opt$digits)) {
+    opt$digits <- 2
+  }
 
   if (isTRUE(opt$header == TRUE)) {
     if (!is.null(opt$header_txt)) {
-      if (is.null(opt$digits)) {
-        opt$digits <- 2
-      }
       cat(format_header(x, digits = opt$digits, markdown = (opt$tbl_format == "markdown"),
                         decimal.mark = decimal.mark, big.mark = big.mark))
     }

diff --git a/R/helpers.R b/R/helpers.R
@@ -6,7 +6,7 @@
 # https://github.com/msberends/cleaner                                 #
 #                                                                      #
 # LICENCE                                                              #
-# (c) 2019 Berends MS (m.s.berends@umcg.nl)                            #
+# (c) 2020 Berends MS (m.s.berends@umcg.nl)                            #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #

diff --git a/R/percentage.R b/R/percentage.R
@@ -6,7 +6,7 @@
 # https://github.com/msberends/cleaner                                 #
 #                                                                      #
 # LICENCE                                                              #
-# (c) 2019 Berends MS (m.s.berends@umcg.nl)                            #
+# (c) 2020 Berends MS (m.s.berends@umcg.nl)                            #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@@ -125,7 +125,8 @@ format.percentage <- function(x, digits = NULL, ...) {
     digits <- getdecimalplaces(x)
   }
   x_formatted <- format(as.double(x) * 100, scientific = FALSE, digits = digits, nsmall = digits, ...)
-  x_formatted[!is.na(x)] <- paste0(x_formatted[!is.na(x)], "%")
+  x_formatted <- paste0(x_formatted, "%")
+  x_formatted[!grepl(pattern = "^[0-9.,]+$", x = x)] <- NA_character_
   x_formatted
 }
 
@@ -157,6 +158,13 @@ mean.percentage <- function(x, ...) {
   as.percentage(mean(as.double(x), ...))
 }
 
+#' @noRd
+#' @exportMethod median.percentage
+#' @export
+median.percentage <- function(x, ...) {
+  as.percentage(median(as.double(x), ...))
+}
+
 #' @noRd
 #' @exportMethod summary.percentage
 #' @export

diff --git a/R/regex_true_false.R b/R/regex_true_false.R
@@ -6,7 +6,7 @@
 # https://github.com/msberends/cleaner                                 #
 #                                                                      #
 # LICENCE                                                              #
-# (c) 2019 Berends MS (m.s.berends@umcg.nl)                            #
+# (c) 2020 Berends MS (m.s.berends@umcg.nl)                            #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -6,7 +6,7 @@
 # https://github.com/msberends/cleaner                                 #
 #                                                                      #
 # LICENCE                                                              #
-# (c) 2019 Berends MS (m.s.berends@umcg.nl)                            #
+# (c) 2020 Berends MS (m.s.berends@umcg.nl)                            #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #

diff --git a/data_raw/unclean.R b/data_raw/unclean.R
@@ -6,7 +6,7 @@
 # https://github.com/msberends/cleaner                                 #
 #                                                                      #
 # LICENCE                                                              #
-# (c) 2019 Berends MS (m.s.berends@umcg.nl)                            #
+# (c) 2020 Berends MS (m.s.berends@umcg.nl)                            #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #

diff --git a/docs/404.html b/docs/404.html
diff --git a/docs/authors.html b/docs/authors.html
diff --git a/docs/extra.css b/docs/extra.css
@@ -7,7 +7,7 @@
 # https://github.com/msberends/cleaner                                 #
 #                                                                      #
 # LICENCE                                                              #
-# (c) 2019 Berends MS (m.s.berends@umcg.nl)                            #
+# (c) 2020 Berends MS (m.s.berends@umcg.nl)                            #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #

diff --git a/docs/index.html b/docs/index.html