From 57c16752f11fcdc5238f3641b51b62497d9c4d4d Mon Sep 17 00:00:00 2001 From: karlropkins Date: Sun, 10 Dec 2023 12:29:11 +0000 Subject: [PATCH 1/6] sp_plot_species species order forced --- DESCRIPTION | 4 ++-- NEWS.md | 4 ++++ R/sp.plot.R | 7 ++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b2fe924..20d8b38 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: respeciate Title: Speciation profiles for gases and aerosols -Version: 0.2.6 -Date: 2023-12-01 +Version: 0.2.7 +Date: 2023-12-10 Description: Acess to the US.EPA Speciate (v5.2) tool, to generate speciation profiles for gases and particles. More details in Simon et al (2010) . diff --git a/NEWS.md b/NEWS.md index 5c2ece5..5f4c7a7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # Version 0.0 - Release Notes +* [0.2.7] + * released 2023-12-10 + * sp_plot_species update; species order forced + * [0.2.6] * released 2023-12-01 * sp_build_rsp_x update; added further value handling diff --git a/R/sp.plot.R b/R/sp.plot.R index 7192a71..339f001 100644 --- a/R/sp.plot.R +++ b/R/sp.plot.R @@ -364,8 +364,9 @@ sp_plot_species <- function(x, id, multi.species = "group", #################################### species <- unique(x$SPECIES_NAME) #should think about other naming options??? + x$SPECIES_NAME <- factor(x$SPECIES_NAME, levels=species) -#ignoreing order at moment +#ignoring option to re-order at moment #order largest to smallest ############################# @@ -425,6 +426,10 @@ sp_plot_species <- function(x, id, multi.species = "group", # format using a supplied function??? x$x.id <- as.numeric(factor(x$PROFILE_CODE)) + ############################## + #species alignment + + p1.ls <- list(x= .value~x.id, data=x, ylab="Measurement", xlab="Sample [index]", From 0a45a60f818e79e2f514ca9ad78e3e746795885a Mon Sep 17 00:00:00 2001 From: karlropkins Date: Sun, 10 Dec 2023 13:34:21 +0000 Subject: [PATCH 2/6] package build --- R/xxx.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/xxx.R b/R/xxx.R index 4b824a3..48b7c29 100644 --- a/R/xxx.R +++ b/R/xxx.R @@ -14,7 +14,7 @@ # (not keeping unless we can get it to work better) -utils::globalVariables(c("sysdata", ".SD", "ans", +utils::globalVariables(c("sysdata", ".SD", "ans", "control", "PROFILE_CODE", "PROFILE_NAME", "PROFILE_TYPE", "SPECIES_ID", "SPECIES_NAME", "SPEC_MW", "WEIGHT_PERCENT", ".", ".value")) From 5f62799ab22b67051cbc76b9b10c1ed060443d2e Mon Sep 17 00:00:00 2001 From: karlropkins Date: Sat, 16 Dec 2023 17:01:04 +0000 Subject: [PATCH 3/6] plot(reset.x) --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS.md | 4 +-- R/sp.plot.R | 86 ++++++++++++++++++++++++++++++++++---------------- R/xxx.R | 1 + man/sp.plot.Rd | 10 +++--- 6 files changed, 70 insertions(+), 34 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 20d8b38..7d29530 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: respeciate Title: Speciation profiles for gases and aerosols Version: 0.2.7 -Date: 2023-12-10 +Date: 2023-12-16 Description: Acess to the US.EPA Speciate (v5.2) tool, to generate speciation profiles for gases and particles. More details in Simon et al (2010) . diff --git a/NAMESPACE b/NAMESPACE index 7bdd909..895dcb7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -69,6 +69,7 @@ importFrom(graphics,rect) importFrom(graphics,text) importFrom(graphics,title) importFrom(lattice,barchart) +importFrom(lattice,panel.abline) importFrom(lattice,panel.barchart) importFrom(lattice,panel.grid) importFrom(lattice,panel.xyplot) diff --git a/NEWS.md b/NEWS.md index 5f4c7a7..dbb6a04 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,8 @@ # Version 0.0 - Release Notes * [0.2.7] - * released 2023-12-10 - * sp_plot_species update; species order forced + * released 2023-12-16 + * sp_plot_species update; species order forced; added reset.x * [0.2.6] * released 2023-12-01 diff --git a/R/sp.plot.R b/R/sp.plot.R index 339f001..10b4b70 100644 --- a/R/sp.plot.R +++ b/R/sp.plot.R @@ -1,6 +1,6 @@ #' @name sp.plot #' @title plotting (re)SPECIATE profiles -#' @aliases sp_plot_profile +#' @aliases sp_plot_profile sp_plot_species #' @description General plots for \code{respeciate} objects. @@ -8,10 +8,12 @@ #' (re)SPECIATE profile data sets. #' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) #' profiles. -#' @param id numeric, indices of profiles to use when -#' plotting (e.g. \code{id=1:6} plots first 6 profiles). -#' @param multi.profile character, how plot should handle -#' multiple profiles, e.g. 'group' or 'panel' (default +#' @param id numeric, the indices of profiles (or species) to use when +#' plotting with \code{sp_plot_profile} (or \code{sp_plot_species}). For +#' example \code{sp_plot_profile(x, id=1:6)} plots first 6 profiles in +#' \code{x}. +#' @param multi.profile character, how \code{sp_plot_profile} should +#' handle multiple profiles, e.g. 'group' or 'panel' (default #' group). #' @param order logical, order the species in the #' profile(s) by relative abundance before plotting. @@ -38,11 +40,15 @@ #JOBS ####################### -#reference lattice and latticeEXtra packages in documents... +#reference lattice, latticeEXtra and loa packages in documents... #all functions need work #see function fix, tidy, etc job notes in code +# dennis asked for data as part of return +# that is do-able but may need an object class + + #thinking about an sp_plot_compare(x, y) # to compare profile x and profile(s) y # started project (in own-notes) @@ -290,8 +296,8 @@ sp_plot_profile <- function(x, id, multi.profile = "group", #so lots of redundancy sp_plot_species <- function(x, id, multi.species = "group", - order=FALSE, log=FALSE, ..., - silent=FALSE){ + order = FALSE, log = FALSE, + ..., silent = FALSE){ #setup .x.args <- list(...) @@ -312,6 +318,10 @@ sp_plot_species <- function(x, id, multi.species = "group", #need to get species as character + ############################## + #if already factor ??? + # user could be forcing order + ############################## .sp.ord <- as.character(unique(x$SPECIES_ID)) #.sp.pro <- unique(x$PROFILE_CODE) #n/profile handling @@ -360,21 +370,34 @@ sp_plot_species <- function(x, id, multi.species = "group", } #################################### - #like sp_plot_profile... + #current species ordering by id arg... + #(see below about reordering) #################################### - species <- unique(x$SPECIES_NAME) + species <- species[species %in% unique(x$SPECIES_ID)] + x$SPECIES_ID <- factor(x$SPECIES_ID, levels=species) + x <- x[order(x$SPECIES_ID),] + x$SPECIES_NAME <- factor(x$SPECIES_NAME, unique(x$SPECIES_NAME)) + species<- levels(x$SPECIES_NAME) + sp.ord <- as.numeric(factor(species, levels=sort(species))) + + ################################## #should think about other naming options??? - x$SPECIES_NAME <- factor(x$SPECIES_NAME, levels=species) + ################################## + #print(species) + #print(sp.ord) #ignoring option to re-order at moment #order largest to smallest ############################# + #like to enable this #like to also be able to order by molecular weight + #need to decide handling if species is already a factor... ??? + #need to decide if this should work off species_id or species_name... ??? ############################## if(order){ ################################ - #bit of a cheat... + #taken from _profile plots ################################ test <- x test$PROFILE_CODE <- ".default" @@ -392,17 +415,15 @@ sp_plot_species <- function(x, id, multi.species = "group", } x <- x[c(".value","PROFILE_CODE", "PROFILE_NAME", "SPECIES_NAME")] - ################## - #profile bar chart + #species trend line plot ################## #dcast and melt to add in any missed entries as NAs - #(for the plot trail) - #not padding + #(to force trend line gaps) + #not padding, obviously not dropping nas... x <- sp_melt_wide(sp_dcast_species(x), pad=FALSE, drop.nas = FALSE) - ############################### #species handling ############################## @@ -424,20 +445,31 @@ sp_plot_species <- function(x, id, multi.species = "group", # convert to factor # but then by default lattice shows all factors labels... # format using a supplied function??? - x$x.id <- as.numeric(factor(x$PROFILE_CODE)) - ############################## - #species alignment + if("reset.x" %in% names(.x.args)){ + #initial test reset.x + # this is a function and it is applied to profile_code + # to build the x axis... + x$.x <- .x.args$reset.x(x$PROFILE_CODE) + .xlab <- "" + } else { + x$.x <- as.numeric(factor(x$PROFILE_CODE)) + .xlab <- "Sample [index]" + } + ############################## + #species alignment - p1.ls <- list(x= .value~x.id, - data=x, ylab="Measurement", xlab="Sample [index]", + p1.ls <- list(x= .value~.x, + data=x, ylab="Measurement", xlab=.xlab, type="l", #NB: prepanel seemed to break ylim when stacking panel = function(x, y, ...){ - rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), - panel.grid, ...) + at.x <- pretty(x) + at.y <- pretty(y) + rsp_panelPal("grid", list(h=at.y,v=at.x, col="grey", lty=3), + panel.abline, ...) panel.xyplot(x=x, y=y, ...) }, between=list(y=.2), @@ -454,10 +486,10 @@ sp_plot_species <- function(x, id, multi.species = "group", if(length(species)>1){ if(tolower(multi.species) %in% c("panel", "panels")){ #paneling multiple panels - p1.ls$x <- .value~x.id | SPECIES_NAME + p1.ls$x <- .value~.x | SPECIES_NAME } else { #grouping multiple panels - p1.ls$x <- .value~x.id + p1.ls$x <- .value~.x p1.ls$groups <- x$SPECIES_NAME } } @@ -506,7 +538,7 @@ sp_plot_species <- function(x, id, multi.species = "group", #title="Legends", lines=list(col=rep(p1.ls$col, length.out=length(species))), - text = list(species, cex=0.7)) + text = list(levels(x$SPECIES_NAME), cex=0.7)) p1.ls$key <- if("key" %in% names(p1.ls)){ modifyList(.tmp, p1.ls$key) } else { diff --git a/R/xxx.R b/R/xxx.R index 48b7c29..4623303 100644 --- a/R/xxx.R +++ b/R/xxx.R @@ -38,6 +38,7 @@ utils::globalVariables(c("sysdata", ".SD", "ans", "control", #' @importFrom lattice xyplot barchart panel.grid panel.xyplot panel.barchart #' trellis.par.get simpleTheme yscale.components.default prepanel.default.xyplot +#' panel.abline #' @importFrom latticeExtra doubleYScale panel.ablineq #' @importFrom data.table ":=" #' @importFrom stats sd cophenetic cor cutree dist hclust heatmap AIC diff --git a/man/sp.plot.Rd b/man/sp.plot.Rd index 7e8812b..a5c3c4d 100644 --- a/man/sp.plot.Rd +++ b/man/sp.plot.Rd @@ -30,11 +30,13 @@ sp_plot_species( \item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) profiles.} -\item{id}{numeric, indices of profiles to use when -plotting (e.g. \code{id=1:6} plots first 6 profiles).} +\item{id}{numeric, the indices of profiles (or species) to use when +plotting with \code{sp_plot_profile} (or \code{sp_plot_species}). For +example \code{sp_plot_profile(x, id=1:6)} plots first 6 profiles in +\code{x}.} -\item{multi.profile}{character, how plot should handle -multiple profiles, e.g. 'group' or 'panel' (default +\item{multi.profile}{character, how \code{sp_plot_profile} should +handle multiple profiles, e.g. 'group' or 'panel' (default group).} \item{order}{logical, order the species in the From ad83bbca51ece296a9a2651a724b5848d7f3508b Mon Sep 17 00:00:00 2001 From: karlropkins Date: Fri, 29 Dec 2023 17:04:04 +0000 Subject: [PATCH 4/6] sp_match update --- DESCRIPTION | 16 +- NAMESPACE | 1 + NEWS.md | 7 +- R/respeciate.generics.R | 442 ++++++++++++++++++++++++++++++++++++- R/sp.match.R | 79 ++++++- R/sp.plot.R | 85 +++++-- R/sp.pls.R | 3 + R/xxx.R | 3 +- man/respeciate.generics.Rd | 21 +- man/sp.match.Rd | 31 ++- man/sp.plot.Rd | 29 ++- 11 files changed, 662 insertions(+), 55 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7d29530..31d7a85 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,16 +1,16 @@ Package: respeciate Title: Speciation profiles for gases and aerosols Version: 0.2.7 -Date: 2023-12-16 -Description: Acess to the US.EPA Speciate (v5.2) tool, to generate speciation - profiles for gases and particles. More details in Simon et al (2010) +Date: 2023-12-29 +Description: Acess to the US.EPA Speciate (v5.2) tool, to generate speciation profiles + for gases and particles. More details in Simon et al (2010) . Type: Package -Authors@R: c( person(given = "Sergio", family = "Ibarra-Espinosa", role = - c("aut", "cre"), email = "sergio.ibarra@usp.br", comment = c(ORCID = - "0000-0002-3162-1905")), person(given = "Karl", family = "Ropkins", - role = c("aut"), email = "k.ropkins@its.leeds.ac.uk", comment = c(ORCID - = "0000-0002-0294-6997")) ) +Authors@R: c( person(given = "Sergio", family = "Ibarra-Espinosa", role = c("aut", + "cre"), email = "sergio.ibarra@usp.br", comment = c(ORCID = + "0000-0002-3162-1905")), person(given = "Karl", family = "Ropkins", role = + c("aut"), email = "k.ropkins@its.leeds.ac.uk", comment = c(ORCID = + "0000-0002-0294-6997")) ) License: MIT + file LICENSE URL: https://github.com/atmoschem/respeciate BugReports: https://github.com/atmoschem/respeciate/issues diff --git a/NAMESPACE b/NAMESPACE index 895dcb7..f7507c6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,6 +53,7 @@ importFrom(grDevices,colorRampPalette) importFrom(grDevices,dev.flush) importFrom(grDevices,dev.hold) importFrom(grDevices,heat.colors) +importFrom(grDevices,rainbow) importFrom(graphics,abline) importFrom(graphics,axis) importFrom(graphics,barplot) diff --git a/NEWS.md b/NEWS.md index dbb6a04..145aeba 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,15 +1,16 @@ # Version 0.0 - Release Notes * [0.2.7] - * released 2023-12-16 - * sp_plot_species update; species order forced; added reset.x + * released 2023-12-29 + * sp_plot_species update; species order forced, added reset.x + * sp_match_profile update; fit methods for pd and sid (Belis) * [0.2.6] * released 2023-12-01 * sp_build_rsp_x update; added further value handling * summary.respeciate update; removed output print * pls_rebuild update; option to add species as marker - * now importing lattice and latticeExtra for plot panelling + * now importing lattice and latticeExtra for plot paneling * pls_plots and plot.respeciate now using lattice/latticeExtra * added sp_plot_profile and sp_profile_species * reduced padding on log scale y-axis diff --git a/R/respeciate.generics.R b/R/respeciate.generics.R index 2327986..b78c530 100644 --- a/R/respeciate.generics.R +++ b/R/respeciate.generics.R @@ -15,16 +15,23 @@ #' also see note. #' @param x the \code{respeciate} #' object to be printed, plotted, etc. -#' @param n when printing a multi-profile object, the -#' maximum number of profiles to report. +#' @param n when plotting or printing a multi-profile object, the +#' maximum number of profiles to report. (When plotting, \code{n} +#' is ignored if \code{id} is also set.) #' @param ... any extra arguments, mostly ignored except by -#' \code{plot} which passes them to \code{\link{sp_plot_profile}}. +#' \code{plot} which passes them to \code{\link{barplot}}. #' @param object like \code{x} but for \code{summary}. +#' @param id numeric, indices of profiles to use when +#' plotting (\code{id=1:6} is equivalent to \code{n=6}). +#' @param order logical, order the species in the +#' profile(s) by relative abundance before plotting. #' @note \code{respeciate} objects revert to #' \code{data.frame}s when not doing anything #' package-specific, so you can still -#' use as previously with other data handling and -#' plotting packages. +#' use as previously with \code{lattice} or +#' \code{ggplot2}, useful if you are pulling multiple +#' profiles and you exceed the base \code{\link{barplot}} +#' capacity... #notes @@ -103,19 +110,440 @@ print.rsp_pls <- function(x, n = NULL, ...){ + #' @rdname respeciate.generics #' @method plot respeciate #' @export +########################## +#notes +########################## +#like.... +#better handling of factor axis labels +#better handling of axes and legend font sizes +# (I think previous code may have handled this a little better) +# (but not perfectly...) +#like horiz=total scales to be other way around? +# could also mean rethinking the legend position for this? + +############################ +#added warning/handling for +# duplicate species in profiles (handling merge/mean) +# duplicated profile names (handling make unique) + +#test is now set up to use data.table + + + plot.respeciate <- - function(x, ...){ - sp_plot_profile(x, ...) + function(x, n=NULL, id=NULL, order=TRUE, ...){ + + #add .value if not there + ## don't think .value works + x <- rsp_tidy_profile(x) + + ##test object type + test <- rsp_test_respeciate(x, level=2, silent=TRUE) + if(test != "respeciate"){ + if(test %in% c("respeciate.profile.ref", "respeciate.species.ref")){ + stop("No plot method for respeciate.reference files.") + } else { + stop("suspect respeciate object!") + } + #don't stop - respeciate profile + } + + ##test something to plot + if(nrow(x)==0){ + ###################### + #think about this + ###################### + #maybe stop() instead??? + #stop("empty respeciate object?") + return(invisible(NULL)) + } + + #hold extra args + # passing to plot + .xargs <- list(...) + + #test number of profiles + #and subset x, etc... + test <- unique(x$PROFILE_CODE) + if(is.null(n) & is.null(id)){ + id <- 1:length(test) + } else { + if(!is.null(n)){ + id <- 1:n + } + } + test <- test[id] + x <- x[x$PROFILE_CODE %in% test,] + #above will die if n-th profile not there + if(length(n)>6){ + warning(paste("\n\t", length(test), + " profiles (might be too many; suggest 6 or less...)", + "\n", sep="")) + } + + x <- rsp_test_profile(x) + + + if(any(x$.n>1)){ + warning(paste("\n\t", + " found duplicate species in profiles (merged and averaged...)", + "\n", sep="")) + } + x$SPECIES_NAME <- rsp_tidy_species_name(x$SPECIES_NAME) + + #################################### + #issue profile names are not always unique + #################################### + test <- x + test$SPECIES_ID <- ".default" + test <- rsp_test_profile(test) + ################### + #rep_test + #can now replace this with data.table version + #BUT check naming conventions for .n + ################### + + #does this need a warning? + if(length(unique(test$PROFILE_NAME)) 6 warning not appearing !!! +#option to have col as a function ??? + +#decide what to do about stacking +#log / bad.log??? + +#say no to stack logs! + +#would like it to handle logs force origin to 0 for standard +# and minimum for logs ??? + +#strip label font size??? + +#key? to reorder the auto.key test and rectangles??? +# key=list(space="right",adj=0,title="Legends", +# points=list(pch=1, +# col=trellis.par.get("superpose.symbol")$col[1:length(labels)]), +# text=list(labels)) + +#plot types??? + +# + +#test +#my <- "C:\\Users\\trakradmin\\OneDrive - University of Leeds\\Documents\\pkg\\respeciate\\test\\uk.metals.aurn.2b.rds" +#my <- sp_build_rsp_x(readRDS(my)) +#rsp_plot(my) + + +######################### +#next +########################## + +#now very messy... +#what can we rationalise??? +#profile name shortening +#profile name to code option??? +#species name to species id option??? + +rsp_plot <- + function(x, id, order=TRUE, + log=FALSE, ...){ + + #setup + ################## + #add .value if not there + x <- rsp_tidy_profile(x) + #others refs + .x.args <- list(...) + .sp.ord <- unique(x$SPECIES_ID) + .sp.pro <- unique(x$PROFILE_NAME) + #n/profile handling + profile <- if (missing(id)) { + profile <- .sp.pro + } else { + id + } + if (is.numeric(profile)) { + if (all(profile == -1)) { + profile <- .sp.pro + } + else { + profile <- .sp.pro[profile] + } + } + if (!any(profile %in% .sp.pro) | any(is.na(profile))) { + stop("RSP> unknown profile(s) or missing ids, please check", call. = FALSE) + } + + if(length(profile)>8 & missing(id)){ + warning("RSP> ", length(profile), " profiles... ", + "plot foreshorten to 8 to reduce cluttering", + "\n\t (maybe use id to force larger range if sure)", + sep="", call.=FALSE) + profile <- profile[1:8] + } + x <- x[x$PROFILE_NAME %in% profile,] + + ##test object type + test <- rsp_test_respeciate(x, level=2, silent=TRUE) + if(test != "respeciate"){ + if(test %in% c("respeciate.profile.ref", "respeciate.species.ref")){ + stop("RSP> No plot method for respeciate.reference files.", + call. = FALSE) + } else { + stop("RSP> suspect respeciate object!", + call. = FALSE) + } + #don't stop - respeciate profile + } + + ##test something to plot + if(nrow(x)==0){ + ###################### + #think about this + ###################### + #maybe stop() instead??? + #stop("empty respeciate object?") + #maybe warning() aw well?? + return(invisible(NULL)) + } + + x <- rsp_test_profile(x) + + if(any(x$.n>1)){ + warning(paste("RSP> found duplicate species in profiles (merged and averaged...)", + sep=""), call.=FALSE) + } + x$SPECIES_NAME <- rsp_tidy_species_name(x$SPECIES_NAME) + + #################################### + #issue profile names are not always unique + #################################### + test <- x + test$SPECIES_ID <- ".default" + test <- rsp_test_profile(test) + ################### + #rep_test + #can now replace this with data.table version + #BUT check naming conventions for .n + ################### + + #does this need a warning? + if(length(unique(test$PROFILE_NAME)) found profiles with common names (making unique...)", + sep=""), call. = FALSE) + test$PROFILE_NAME <- make.unique(test$PROFILE_NAME) + x <- x[names(x) != "PROFILE_NAME"] + x <- merge(x, test[c("PROFILE_NAME", "PROFILE_CODE")], by="PROFILE_CODE") + } + + #x$PROFILE_NAME <- make.unique(x$PROFILE_NAME) + #order largest to smallest + ############################# + #like to also be able to order by molecular weight + ############################## + if(order){ + ################################ + #bit of a cheat... + ################################ + test <- x + test$PROFILE_CODE <- ".default" + test <- rsp_test_profile(test) + #previous barplot had bedside + if("stack" %in% names(.x.args) && .x.args$stack){ + test <- test[order(test$.total, decreasing = TRUE),] + xx <- unique(test$SPECIES_NAME) + } else { + test <- x[order(x$WEIGHT_PERCENT, decreasing = TRUE),] + xx <- unique(test$SPECIES_NAME) + } + } else { + xx <- unique(x$SPECIES_NAME) + } + x <- x[c("WEIGHT_PERCENT", "PROFILE_NAME", "SPECIES_NAME")] + + x$SPECIES_NAME <- factor(x$SPECIES_NAME, + levels = xx) + + ################## + #profile bar chart + ################## + p1.ls <- list(x= WEIGHT_PERCENT~SPECIES_NAME, + data=x, ylab="Profile Loading", xlab="", + #NB: prepanel seemed to break ylim when stacking + panel = function(x, y, origin, ylim, ...){ + rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), + panel.grid, ...) + if(missing(origin)){ + origin <- if(min(y, na.rm=TRUE) < 0 ) { + min(y, na.rm=TRUE) - 0.02 + } else { + 0 + } + } + panel.barchart(x=x, y=y, origin=origin, ylim=ylim, ...) + }, + between=list(y=.2), + scales=list(x=list(rot=90, + cex=0.7, + alternating=1), + y=list(rot=c(0,90), + cex=0.7, + alternating=3, + relation="free")) + ) + #, + #auto.key=list(space="right", columns = 1, + # cex=0.7, + # points=FALSE, + # rectangles=TRUE)) + ################# + #this may need refining... + + ##################### + #this is involved... + + if("col" %in% names(.x.args)){ + if(is.function(.x.args$col)){ + .x.args$col <- .x.args$col(length(profile)) + } + } + + if(length(profile)>1){ + #panel or group profiles? + if("panel.profiles" %in% names(.x.args)){ + p1.ls$x <- WEIGHT_PERCENT~SPECIES_NAME | PROFILE_NAME + } else { + p1.ls$groups <- x$PROFILE_NAME + if(!"col" %in% names(p1.ls)){ + p1.ls$col <- rep(trellis.par.get("superpose.polygon")$col, + length.out=length(profile)) + } + } + } + + if(log){ + if("stack" %in% names(.x.args) && .x.args$stack){ + stop("RSP> sorry currently don't stack logs...", + call. = FALSE) + } + #previous + p1.ls$scales$y$log <- 10 + p1.ls$yscale.components <- rsp_yscale.component.log10 + } + p1.ls <- modifyList(p1.ls, .x.args) + if("groups" %in% names(p1.ls) & length(profile)>1){ + #add key... if auto.key not there + .tmp <- if("col" %in% names(p1.ls)){ + rep(p1.ls$col, length.out = length(profile)) + } else { + rep(trellis.par.get("superpose.polygon")$col, + length.out=length(profile)) + } + p1.ls$key <- list(space="right", + #title="Legends", + rectangles=list(col=.tmp), + text = list(profile, cex=0.7)) + } + if("key" %in% names(.x.args)){ + p1.ls$key <- modifyList(p1.ls$key, .x.args$key) + } + if("col" %in% names(p1.ls)){ + p1.ls$par.settings = list(superpose.polygon = list(col = p1.ls$col), + superpose.symbol = list(fill = p1.ls$col)) + } + p1 <- do.call(barchart, p1.ls) + return(p1) + } ################################## #summary diff --git a/R/sp.match.R b/R/sp.match.R index e71d9fa..476f86c 100644 --- a/R/sp.match.R +++ b/R/sp.match.R @@ -21,12 +21,28 @@ #' @param min.n \code{numeric} (default 8), the minimum number of paired #' species measurements in two profiles required for a match to be assessed. #' See also \code{\link{sp_species_cor}}. +#' @param method Character (default 'pd'), the similarity measure to use, current +#' options 'pd', the Pearson's Distance (1- Pearson's correlation coefficient), +#' or 'sid', the Standardized Identity Distance. #' @param test.x Logical (default FALSE). The match process self-tests by adding -#' \code{x} to \code{ref}, which should generate a perfect fit=1 score. Setting +#' \code{x} to \code{ref}, which should generate a perfect fit=0 score. Setting #' \code{test.x} to \code{TRUE} retains this as an extra record. #' @return \code{sp_match_profile} returns a fit report: a \code{data.frame} of #' up to \code{n} fit reports for the nearest matches to \code{x} from the #' reference profile data set, \code{ref}. +#' @references Distance metrics are based on recommendations by Belis et al (2015) +#' and as implemented in Mooibroek et al (2022): +#' +#' Belis, C.A., Pernigotti, D., Karagulian, F., Pirovano, G., Larsen, B.R., +#' Gerboles, M., Hopke, P.K., 2015. A new methodology to assess the performance +#' and uncertainty of source apportionment models in intercomparison +#' exercises. Atmospheric Environment, 119, 35–44. +#' https://doi.org/10.1016/j.atmosenv.2015.08.002. +#' +#' Mooibroek, D., Sofowote, U.M. and Hopke, P.K., 2022. Source apportionment of +#' ambient PM10 collected at three sites in an urban-industrial area with +#' multi-time resolution factor analyses. Science of The Total Environment, +#' 850, p.157981. http://dx.doi.org/10.1016/j.scitotenv.2022.157981. #NOTE @@ -99,7 +115,7 @@ # but might need to rethink n, min.bin, etc??? sp_match_profile <- function(x, ref, matches=10, rescale=5, - min.n=8, test.x=FALSE){ + min.n=8, method = "pd", test.x=FALSE){ ####################### #if ref missing @@ -228,13 +244,60 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, # compare this and code in sp_species_cor # if/when we deal with this stop message this code may need to be updated - f <- function(x) { - if(length(x[!is.na(x) & !is.na(.test)])>min.n){ - suppressWarnings(cor(x, .test, use ="pairwise.complete.obs")) - } else { - NA + ######################### + #method + ######################## + #to do + # check with dennis re SID negative handling + # think about adding a log.pd + # but three options would mean we need stricter method handling... + f <- FALSE + if(tolower(method)=="pd"){ + # method pd + f <- function(x) { + if(length(x[!is.na(x) & !is.na(.test)])>min.n){ + suppressWarnings(1-cor(x, .test, use ="pairwise.complete.obs")) + } else { + NA + } + } + } + if(tolower(method)=="log.pd"){ + # method log pd + # to think about + # drops if a lot are set to zero... + f <- function(x) { + if(length(x[!is.na(x) & !is.na(.test)])>min.n){ + suppressWarnings(1-cor(log10(x), log10(.test), + use ="pairwise.complete.obs")) + } else { + NA + } } } + if(tolower(method)=="sid"){ + # method SID + #################################### + #need to check this with dennis + #how are negatives handled?? + #################################### + f <- function(x) { + .ref <- !is.na(x) & !is.na(.test) + x <- x[.ref] + if(length(x)>min.n){ + .test <- as.vector(unlist(.test))[.ref] + abs((sqrt(2)/length(x))* (sum(((.test-x)/(.test+x)), na.rm = TRUE))) + #if(.ans < 0) NA else .ans + #.ans + } else{ + NA + } + } + } + if(!is.function(f)){ + stop("RSP> sp_match_profile 'method' unknown", call. = FALSE) + } + .out <- .tmp[, (.cols) := lapply(.SD, f), .SDcols = .cols] ########################## @@ -246,7 +309,7 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, # might be a better way of doing this??? .out <- as.data.frame(.out[1, -1:-2]) - .out <- sort(unlist(.out), decreasing = TRUE) + .out <- sort(unlist(.out), decreasing = FALSE) if(length(.out) > matches){ .out <- .out[1:matches] } diff --git a/R/sp.plot.R b/R/sp.plot.R index 10b4b70..d555fcf 100644 --- a/R/sp.plot.R +++ b/R/sp.plot.R @@ -5,13 +5,13 @@ #' @description General plots for \code{respeciate} objects. #' @description \code{sp_plot} functions generate plots for supplied -#' (re)SPECIATE profile data sets. +#' (re)SPECIATE data sets. #' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) #' profiles. -#' @param id numeric, the indices of profiles (or species) to use when -#' plotting with \code{sp_plot_profile} (or \code{sp_plot_species}). For -#' example \code{sp_plot_profile(x, id=1:6)} plots first 6 profiles in -#' \code{x}. +#' @param id numeric, the indices of profiles or species to use when +#' plotting with \code{sp_plot_profile} or \code{sp_plot_species}, +#' respectively. For example, \code{sp_plot_profile(x, id=1:6)} plots +#' first 6 profiles in \code{respeciate} object \code{x}. #' @param multi.profile character, how \code{sp_plot_profile} should #' handle multiple profiles, e.g. 'group' or 'panel' (default #' group). @@ -22,9 +22,25 @@ #' plotting functions. #' @param silent logical, hide warnings when generating plots (default #' \code{FALSE}) -#' @param multi.species, like \code{multi.profile} but for species. +#' @param multi.species, character, like \code{multi.profile} in +#' \code{sp_plot_profile} but for species in \code{sp_plot_species}. #' @return \code{sp_plot} graph, plot, etc usually as a trellis object. #' @note These functions are currently in development, so may change. +#' @references Most \code{respeciate} plots make extensive use of +#' \code{lattice} and \code{latticeExtra} code: +#' +#' Sarkar D (2008). \emph{Lattice: Multivariate Data Visualization with R}. +#' Springer, New York. ISBN 978-0-387-75968-5, \url{http://lmdvr.r-forge.r-project.org}. +#' +#' Sarkar D, Andrews F (2022). \emph{latticeExtra: Extra Graphical Utilities Based +#' on Lattice}. R package version 0.6-30, +#' \url{https://CRAN.R-project.org/package=latticeExtra}. +#' +#' They also incorporate ideas from \code{loa}: +#' +#' Ropkins K (2023). \emph{loa: various plots, options and add-ins for use with lattice}. +#' R package version 0.2.48.3, \url{https://CRAN.R-project.org/package=loa}. + #functions # sp_plot_profile @@ -32,27 +48,51 @@ # plot.respeciate is wrapper for sp_plot_profile -#use unexported -# rsp_plot_fix +#uses unexported code +# rsp_plot_fix +# rsp_yscale.component.log10 (currently in sp.pls.r) + #JOBS ####################### -#reference lattice, latticeEXtra and loa packages in documents... +#references may need formatting tidying +# currently these are lattice, latticeEXtra and loa... +# check roxygen2 guidance ??? #all functions need work #see function fix, tidy, etc job notes in code +# ALL need better colour handling for large numbers of cases +# typically group handling... +# maybe a variation on col=rainbow ??? + +#examples +# maybe +# sp_plot_profile(spq_pm.ae8()) +# (allows most lattice style plot control, etc key=list(...)) +# (but includes some short cuts to common handling, e.g. log=T to +# log y scales and reformat y axes) +# sp_plot_profile(spq_pm.ae8(), key=list(space="top", columns=2), log=T) + +#color defaults... +#issue current default wraps if you exceed number of cols in default set. +#from: https://stackoverflow.com/questions/26314701/r-reducing-colour-saturation-of-a-colour-palette +#function(x) colorRampPalette(rainbow(12, s = 0.5, v = 1)[2:11],interpolate = "spline")(x) +## ?? could extrapolate the default colors using something like above ??? + + # dennis asked for data as part of return # that is do-able but may need an object class - +# (maybe like the openair code...) #thinking about an sp_plot_compare(x, y) # to compare profile x and profile(s) y # started project (in own-notes) + ################################### #sp_plot_profile ################################### @@ -77,6 +117,14 @@ #see in code notes about jobs +############################## +#testing +# reset.x as option to change +# x access handing +# wondering about a general fix +# upfront so applied to x (rsp data.frame) +# what is x, how is it formatted, etc +# then same for y, groups and cond... sp_plot_profile <- function(x, id, multi.profile = "group", order=TRUE, log=FALSE, ..., @@ -292,7 +340,7 @@ sp_plot_profile <- function(x, id, multi.profile = "group", #in development -#taken straight from sp_plot_profile +#lot taken straight from sp_plot_profile #so lots of redundancy sp_plot_species <- function(x, id, multi.species = "group", @@ -446,6 +494,10 @@ sp_plot_species <- function(x, id, multi.species = "group", # but then by default lattice shows all factors labels... # format using a supplied function??? + ############################ + #could move this top and apply + #before plotting?? + if("reset.x" %in% names(.x.args)){ #initial test reset.x # this is a function and it is applied to profile_code @@ -517,8 +569,15 @@ sp_plot_species <- function(x, id, multi.species = "group", } } else { p1.ls$col <- if("groups" %in% names(p1.ls)){ - rep(trellis.par.get("superpose.line")$col, - length.out=length(species)) + colorRampPalette(rainbow(12, s = 0.5, v = 1), + interpolate = "spline")(length(species)) + #or: + #colorRampPalette(rainbow(12, s = 0.5, v = 1),interpolate = "spline")(x) + #was: + #colorRampPalette(trellis.par.get("superpose.line")$col, + # interpolate = "spline")(length(species)) + #rep(trellis.par.get("superpose.line")$col, + # length.out=length(species)) } else { trellis.par.get("superpose.line")$col[1] } diff --git a/R/sp.pls.R b/R/sp.pls.R index 581fc9c..31a1c17 100644 --- a/R/sp.pls.R +++ b/R/sp.pls.R @@ -1738,6 +1738,9 @@ rsp_profile_code_order <- function(data){ #log axis hander #based on lattice text book method +#issues?? +# could be problem with y padding when log=T and .value range is wide... + rsp_yscale.component.log10 <- function(lim, ...) { ans <- yscale.components.default(lim = lim, ...) tick.at <- pretty(lim) diff --git a/R/xxx.R b/R/xxx.R index 4623303..72d73e1 100644 --- a/R/xxx.R +++ b/R/xxx.R @@ -47,8 +47,7 @@ utils::globalVariables(c("sysdata", ".SD", "ans", "control", #' @importFrom graphics axis barplot par legend lines rect text abline #' grid mtext plot.new plot.window points polygon title #' @importFrom grDevices cm.colors colorRampPalette as.graphicsAnnot -#' dev.flush dev.hold heat.colors - +#' dev.flush dev.hold heat.colors rainbow #might be able to drop legend? diff --git a/man/respeciate.generics.Rd b/man/respeciate.generics.Rd index 0e92b2d..078bdb2 100644 --- a/man/respeciate.generics.Rd +++ b/man/respeciate.generics.Rd @@ -12,7 +12,7 @@ \method{print}{rsp_pls}(x, n = NULL, ...) -\method{plot}{respeciate}(x, ...) +\method{plot}{respeciate}(x, n = NULL, id = NULL, order = TRUE, ...) \method{summary}{respeciate}(object, ...) } @@ -20,11 +20,18 @@ \item{x}{the \code{respeciate} object to be printed, plotted, etc.} -\item{n}{when printing a multi-profile object, the -maximum number of profiles to report.} +\item{n}{when plotting or printing a multi-profile object, the +maximum number of profiles to report. (When plotting, \code{n} +is ignored if \code{id} is also set.)} \item{...}{any extra arguments, mostly ignored except by -\code{plot} which passes them to \code{\link{sp_plot_profile}}.} +\code{plot} which passes them to \code{\link{barplot}}.} + +\item{id}{numeric, indices of profiles to use when +plotting (\code{id=1:6} is equivalent to \code{n=6}).} + +\item{order}{logical, order the species in the +profile(s) by relative abundance before plotting.} \item{object}{like \code{x} but for \code{summary}.} } @@ -43,6 +50,8 @@ also see note. \code{respeciate} objects revert to \code{data.frame}s when not doing anything package-specific, so you can still -use as previously with other data handling and -plotting packages. +use as previously with \code{lattice} or +\code{ggplot2}, useful if you are pulling multiple +profiles and you exceed the base \code{\link{barplot}} +capacity... } diff --git a/man/sp.match.Rd b/man/sp.match.Rd index d742325..1b2584d 100644 --- a/man/sp.match.Rd +++ b/man/sp.match.Rd @@ -5,7 +5,15 @@ \alias{sp_match_profile} \title{Find nearest matches from reference set of profiles} \usage{ -sp_match_profile(x, ref, matches = 10, rescale = 5, min.n = 8, test.x = FALSE) +sp_match_profile( + x, + ref, + matches = 10, + rescale = 5, + min.n = 8, + method = "pd", + test.x = FALSE +) } \arguments{ \item{x}{A \code{respeciate} object or similar \code{data.frame} containing @@ -28,8 +36,12 @@ comparing \code{x} and profiles in \code{ref}: options 0 to 5 handled by species measurements in two profiles required for a match to be assessed. See also \code{\link{sp_species_cor}}.} +\item{method}{Character (default 'pd'), the similarity measure to use, current +options 'pd', the Pearson's Distance (1- Pearson's correlation coefficient), +or 'sid', the Standardized Identity Distance.} + \item{test.x}{Logical (default FALSE). The match process self-tests by adding -\code{x} to \code{ref}, which should generate a perfect fit=1 score. Setting +\code{x} to \code{ref}, which should generate a perfect fit=0 score. Setting \code{test.x} to \code{TRUE} retains this as an extra record.} } \value{ @@ -43,3 +55,18 @@ reference profile data set, \code{ref}. attempt to identify nearest matches on the basis of correlation coefficient. } +\references{ +Distance metrics are based on recommendations by Belis et al (2015) +and as implemented in Mooibroek et al (2022): + +Belis, C.A., Pernigotti, D., Karagulian, F., Pirovano, G., Larsen, B.R., +Gerboles, M., Hopke, P.K., 2015. A new methodology to assess the performance +and uncertainty of source apportionment models in intercomparison +exercises. Atmospheric Environment, 119, 35–44. +https://doi.org/10.1016/j.atmosenv.2015.08.002. + +Mooibroek, D., Sofowote, U.M. and Hopke, P.K., 2022. Source apportionment of +ambient PM10 collected at three sites in an urban-industrial area with +multi-time resolution factor analyses. Science of The Total Environment, +850, p.157981. http://dx.doi.org/10.1016/j.scitotenv.2022.157981. +} diff --git a/man/sp.plot.Rd b/man/sp.plot.Rd index a5c3c4d..dfcecd3 100644 --- a/man/sp.plot.Rd +++ b/man/sp.plot.Rd @@ -30,10 +30,10 @@ sp_plot_species( \item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) profiles.} -\item{id}{numeric, the indices of profiles (or species) to use when -plotting with \code{sp_plot_profile} (or \code{sp_plot_species}). For -example \code{sp_plot_profile(x, id=1:6)} plots first 6 profiles in -\code{x}.} +\item{id}{numeric, the indices of profiles or species to use when +plotting with \code{sp_plot_profile} or \code{sp_plot_species}, +respectively. For example, \code{sp_plot_profile(x, id=1:6)} plots +first 6 profiles in \code{respeciate} object \code{x}.} \item{multi.profile}{character, how \code{sp_plot_profile} should handle multiple profiles, e.g. 'group' or 'panel' (default @@ -50,7 +50,8 @@ plotting functions.} \item{silent}{logical, hide warnings when generating plots (default \code{FALSE})} -\item{multi.species, }{like \code{multi.profile} but for species.} +\item{multi.species, }{character, like \code{multi.profile} in +\code{sp_plot_profile} but for species in \code{sp_plot_species}.} } \value{ \code{sp_plot} graph, plot, etc usually as a trellis object. @@ -59,8 +60,24 @@ plotting functions.} General plots for \code{respeciate} objects. \code{sp_plot} functions generate plots for supplied -(re)SPECIATE profile data sets. +(re)SPECIATE data sets. } \note{ These functions are currently in development, so may change. } +\references{ +Most \code{respeciate} plots make extensive use of +\code{lattice} and \code{latticeExtra} code: + +Sarkar D (2008). \emph{Lattice: Multivariate Data Visualization with R}. +Springer, New York. ISBN 978-0-387-75968-5, \url{http://lmdvr.r-forge.r-project.org}. + +Sarkar D, Andrews F (2022). \emph{latticeExtra: Extra Graphical Utilities Based +on Lattice}. R package version 0.6-30, +\url{https://CRAN.R-project.org/package=latticeExtra}. + +They also incorporate ideas from \code{loa}: + +Ropkins K (2023). \emph{loa: various plots, options and add-ins for use with lattice}. +R package version 0.2.48.3, \url{https://CRAN.R-project.org/package=loa}. +} From c599d52cb9a9d34f1bbb1050a7e6f00e71f96403 Mon Sep 17 00:00:00 2001 From: karlropkins Date: Sun, 31 Dec 2023 16:11:57 +0000 Subject: [PATCH 5/6] plot.respeciate (sp_plot_profile) --- R/respeciate.generics.R | 21 +++++++++++---------- R/sp.match.R | 23 ++++++++++++++++++++--- R/xxx.R | 12 ++++++++++++ man/respeciate.generics.Rd | 17 +++++------------ 4 files changed, 48 insertions(+), 25 deletions(-) diff --git a/R/respeciate.generics.R b/R/respeciate.generics.R index b78c530..67f4b22 100644 --- a/R/respeciate.generics.R +++ b/R/respeciate.generics.R @@ -11,26 +11,21 @@ #' object or similar, \code{\link{print}} manages its appearance. #' @description When supplied a \code{respeciate} #' object, \code{\link{plot}} provides a basic plot -#' output. This uses base function \code{\link{barplot}}; +#' output. This uses base function \code{\link{barchart}}; #' also see note. #' @param x the \code{respeciate} #' object to be printed, plotted, etc. #' @param n when plotting or printing a multi-profile object, the -#' maximum number of profiles to report. (When plotting, \code{n} -#' is ignored if \code{id} is also set.) +#' maximum number of profiles to report. #' @param ... any extra arguments, mostly ignored except by -#' \code{plot} which passes them to \code{\link{barplot}}. +#' \code{plot} which passes them to \code{\link{sp_plot_profile}}. #' @param object like \code{x} but for \code{summary}. -#' @param id numeric, indices of profiles to use when -#' plotting (\code{id=1:6} is equivalent to \code{n=6}). -#' @param order logical, order the species in the -#' profile(s) by relative abundance before plotting. #' @note \code{respeciate} objects revert to #' \code{data.frame}s when not doing anything #' package-specific, so you can still #' use as previously with \code{lattice} or #' \code{ggplot2}, useful if you are pulling multiple -#' profiles and you exceed the base \code{\link{barplot}} +#' profiles and you exceed the base \code{\link{barchart}} #' capacity... @@ -133,9 +128,15 @@ print.rsp_pls <- function(x, n = NULL, ...){ #test is now set up to use data.table +#this is now sp_plot_profile + +plot.respeciate <- function(x, ...){ + sp_plot_profile(x, ...) +} + -plot.respeciate <- +rsp_plot.respeciate.old <- function(x, n=NULL, id=NULL, order=TRUE, ...){ #add .value if not there diff --git a/R/sp.match.R b/R/sp.match.R index 476f86c..e767f5e 100644 --- a/R/sp.match.R +++ b/R/sp.match.R @@ -183,7 +183,8 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, # but then maybe need to check requires # cols are there??? - .tmp <- data.table::as.data.table(sp_rescale_species(.tmp, method=rescale)) + #.tmp <- data.table::as.data.table(sp_rescale_species(.tmp, method=rescale)) + .tmp <- data.table::as.data.table(sp_rescale_profile(.tmp, method=rescale)) ################### #keep species names and ids for renaming @@ -286,14 +287,30 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, x <- x[.ref] if(length(x)>min.n){ .test <- as.vector(unlist(.test))[.ref] - abs((sqrt(2)/length(x))* (sum(((.test-x)/(.test+x)), na.rm = TRUE))) - #if(.ans < 0) NA else .ans + .ans <- (sqrt(2)/length(x))* (sum(((.test-x)/(.test+x)), na.rm = TRUE)) + if(.ans < 0) NA else .ans #.ans } else{ NA } } } + if(tolower(method)=="sid.2"){ + # method SID + #################################### + # based on reading I think this is closer?? + #################################### + f <- function(x) { + .ref <- !is.na(x) & !is.na(.test) + x <- x[.ref] + if(length(x)>min.n){ + .test <- as.vector(unlist(.test))[.ref] + mean(abs(x-.test)/.test, na.rm=TRUE) + } else{ + NA + } + } + } if(!is.function(f)){ stop("RSP> sp_match_profile 'method' unknown", call. = FALSE) } diff --git a/R/xxx.R b/R/xxx.R index 72d73e1..66295d7 100644 --- a/R/xxx.R +++ b/R/xxx.R @@ -5,6 +5,18 @@ #currently no hooks, etc... + +##################### +# to think about +##################### + +# standardise error messages, e.g. RSP> [function]: [issue] \n\t [fix]? + +# make respeciate object argument rsp rather than x +# that helps sp_plot..() but maybe not plot() + + + ##################### #to check ##################### diff --git a/man/respeciate.generics.Rd b/man/respeciate.generics.Rd index 078bdb2..cd90be2 100644 --- a/man/respeciate.generics.Rd +++ b/man/respeciate.generics.Rd @@ -12,7 +12,7 @@ \method{print}{rsp_pls}(x, n = NULL, ...) -\method{plot}{respeciate}(x, n = NULL, id = NULL, order = TRUE, ...) +\method{plot}{respeciate}(x, ...) \method{summary}{respeciate}(object, ...) } @@ -21,17 +21,10 @@ object to be printed, plotted, etc.} \item{n}{when plotting or printing a multi-profile object, the -maximum number of profiles to report. (When plotting, \code{n} -is ignored if \code{id} is also set.)} +maximum number of profiles to report.} \item{...}{any extra arguments, mostly ignored except by -\code{plot} which passes them to \code{\link{barplot}}.} - -\item{id}{numeric, indices of profiles to use when -plotting (\code{id=1:6} is equivalent to \code{n=6}).} - -\item{order}{logical, order the species in the -profile(s) by relative abundance before plotting.} +\code{plot} which passes them to \code{\link{sp_plot_profile}}.} \item{object}{like \code{x} but for \code{summary}.} } @@ -43,7 +36,7 @@ object or similar, \code{\link{print}} manages its appearance. When supplied a \code{respeciate} object, \code{\link{plot}} provides a basic plot -output. This uses base function \code{\link{barplot}}; +output. This uses base function \code{\link{barchart}}; also see note. } \note{ @@ -52,6 +45,6 @@ also see note. package-specific, so you can still use as previously with \code{lattice} or \code{ggplot2}, useful if you are pulling multiple -profiles and you exceed the base \code{\link{barplot}} +profiles and you exceed the base \code{\link{barchart}} capacity... } From 9d0bbac7a8986ac290e3f03fb8bf3e8ba2b03f08 Mon Sep 17 00:00:00 2001 From: karlropkins Date: Sat, 17 Feb 2024 16:37:21 +0000 Subject: [PATCH 6/6] matching by sid --- .Rproj.user/shared/notebooks/paths | 1 + DESCRIPTION | 16 ++++------ NEWS.md | 6 ++-- R/respeciate.generics.R | 6 ++++ R/sp.average.R | 25 +++++++++++++++ R/sp.match.R | 51 ++++++++++++++++++++++++++++-- R/sp.pad.R | 9 ++++++ R/sp.pls.R | 2 +- R/sp.reshape.R | 2 +- man/sp.match.Rd | 2 +- 10 files changed, 104 insertions(+), 16 deletions(-) diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index e06616b..aa92c1b 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -3,6 +3,7 @@ C:/Users/trakradmin/OneDrive - University of Leeds/Documents/ITS/projects/NERC_T C:/Users/trakradmin/OneDrive - University of Leeds/Documents/_isolateContribution&breakPointAnalysis_KR_20230824.R="F18B98A3" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/_paper_01_IntroToRespeciate/MS Access Versions/speciate_5.2_0/test.R="FCE2E494" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/marylebone03/_marylebone_analysis_pls_01.Rmd="F2B723A3" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/marylebone03/_marylebone_initial_observations_01.Rmd="E72195E5" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/marylebone03/_marylebone_metals_03.Rmd="D2C38DFE" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/match01/_match_notes_01.Rmd="A446C96C" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/.Rbuildignore="BAFF788D" diff --git a/DESCRIPTION b/DESCRIPTION index 31d7a85..8c3d69a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,16 +1,14 @@ Package: respeciate Title: Speciation profiles for gases and aerosols Version: 0.2.7 -Date: 2023-12-29 -Description: Acess to the US.EPA Speciate (v5.2) tool, to generate speciation profiles - for gases and particles. More details in Simon et al (2010) - . +Date: 2024-02-17 +Description: Acess to the US.EPA Speciate (v5.2) tool, to generate speciation profiles for + gases and particles. More details in Simon et al (2010) . Type: Package -Authors@R: c( person(given = "Sergio", family = "Ibarra-Espinosa", role = c("aut", - "cre"), email = "sergio.ibarra@usp.br", comment = c(ORCID = - "0000-0002-3162-1905")), person(given = "Karl", family = "Ropkins", role = - c("aut"), email = "k.ropkins@its.leeds.ac.uk", comment = c(ORCID = - "0000-0002-0294-6997")) ) +Authors@R: c( person(given = "Sergio", family = "Ibarra-Espinosa", role = c("aut", "cre"), + email = "sergio.ibarra@usp.br", comment = c(ORCID = "0000-0002-3162-1905")), + person(given = "Karl", family = "Ropkins", role = c("aut"), email = + "k.ropkins@its.leeds.ac.uk", comment = c(ORCID = "0000-0002-0294-6997")) ) License: MIT + file LICENSE URL: https://github.com/atmoschem/respeciate BugReports: https://github.com/atmoschem/respeciate/issues diff --git a/NEWS.md b/NEWS.md index 145aeba..fcd9fba 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,11 @@ # Version 0.0 - Release Notes * [0.2.7] - * released 2023-12-29 + * released 2024-02-17 * sp_plot_species update; species order forced, added reset.x - * sp_match_profile update; fit methods for pd and sid (Belis) + * sp_match_profile update; fit methods for pd and sid (Belis) + * sp_match_profile update; added sid variations + * pls_plot patch; forcing profile order * [0.2.6] * released 2023-12-01 diff --git a/R/respeciate.generics.R b/R/respeciate.generics.R index 67f4b22..ba83e0f 100644 --- a/R/respeciate.generics.R +++ b/R/respeciate.generics.R @@ -136,6 +136,12 @@ plot.respeciate <- function(x, ...){ +######################### +#to do +######################### + +#check below and then remove??? + rsp_plot.respeciate.old <- function(x, n=NULL, id=NULL, order=TRUE, ...){ diff --git a/R/sp.average.R b/R/sp.average.R index a7817a8..8e68d55 100644 --- a/R/sp.average.R +++ b/R/sp.average.R @@ -143,3 +143,28 @@ sp_average_profile <- function(x, code = NULL, name = NULL, method = 1, } + + +##################################### +#sp_species_calc +##################################### + +sp_species_calc <- function(x, calc = NULL, + id = NULL, name = NULL, + ...){ + #x is an rsp object + #calc is the calculation to apply to species in x + + .temp <- x + #test we can use this..? + print(calc) + .temp <- sp_dcast_species(.temp) + if(length(grep("=", calc)) > 0){ + print("is equals") + } else { + print("no equals") + } + + #out + return(NULL) +} diff --git a/R/sp.match.R b/R/sp.match.R index e767f5e..d463994 100644 --- a/R/sp.match.R +++ b/R/sp.match.R @@ -23,7 +23,7 @@ #' See also \code{\link{sp_species_cor}}. #' @param method Character (default 'pd'), the similarity measure to use, current #' options 'pd', the Pearson's Distance (1- Pearson's correlation coefficient), -#' or 'sid', the Standardized Identity Distance. +#' or 'sid', the Standardized Identity Distance (See References). #' @param test.x Logical (default FALSE). The match process self-tests by adding #' \code{x} to \code{ref}, which should generate a perfect fit=0 score. Setting #' \code{test.x} to \code{TRUE} retains this as an extra record. @@ -276,7 +276,7 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, } } } - if(tolower(method)=="sid"){ + if(tolower(method)=="sid.1"){ # method SID #################################### #need to check this with dennis @@ -311,6 +311,53 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, } } } + if(tolower(method)=="sid.3"){ + # method SID + #################################### + # based on reading I think this is closer?? + #################################### + f <- function(x) { + .ref <- !is.na(x) & !is.na(.test) & x!=0 & .test!=0 + x <- x[.ref] + if(length(x)>min.n){ + .test <- as.vector(unlist(.test))[.ref] + #rescale x and ref may be different + temp <- .test/x + temp <- temp[is.finite(temp)] + temp <- mean(temp, na.rm=TRUE) + x <- x * temp + ans <- mean(abs(x-.test)/.test, na.rm=TRUE) + #rounding issue somewhere... or jitter... ??? + round(ans, digits=10) + } else{ + NA + } + } + } + + + if(tolower(method)=="sid"){ + # method SID + #################################### + # based on reading I think this is closer?? + #################################### + f <- function(x) { + .ref <- !is.na(x) & !is.na(.test) & x!=0 & .test!=0 + x <- x[.ref] + if(length(x)>min.n){ + .test <- as.vector(unlist(.test))[.ref] + #rescale x and ref may be different + mod <- lm(.test~0+x, weights=1/x) + x <- predict(mod) + ans <- mean(abs(x-.test)/.test, na.rm=TRUE) + #rounding issue somewhere... or jitter... ??? + round(ans, digits=10) + } else{ + NA + } + } + } + if(!is.function(f)){ stop("RSP> sp_match_profile 'method' unknown", call. = FALSE) } diff --git a/R/sp.pad.R b/R/sp.pad.R index 4f87a40..97aeb84 100644 --- a/R/sp.pad.R +++ b/R/sp.pad.R @@ -106,6 +106,7 @@ sp_pad <- function(x, pad = "standard", drop.nas = TRUE){ if(any(c("species", "standard", "all") %in% tolower(pad))){ SPECIES_PROPERTIES <- data.table::as.data.table(sysdata$SPECIES_PROPERTIES) .tmp <- intersect(names(out), names(SPECIES_PROPERTIES)) + print(.tmp) if(length(.tmp) >0){ out <- merge(out, SPECIES_PROPERTIES, by = .tmp, all.y=FALSE, all.x=TRUE, allow.cartesian=TRUE) @@ -122,6 +123,8 @@ sp_pad <- function(x, pad = "standard", drop.nas = TRUE){ } } + #return(out) + #references if(any(c("reference", "references", "all") %in% tolower(pad))){ PROFILE_REFERENCE <- data.table::as.data.table(sysdata$PROFILE_REFERENCE) @@ -138,6 +141,12 @@ sp_pad <- function(x, pad = "standard", drop.nas = TRUE){ } } + ################################################# + #need to think about this + #weight_percent not there if up don't pad weights + # or profiles and will be NA for anything not + # in the SPECIATE archive + ################################################# #drop.nas. if(drop.nas){ out <- out[!is.na(out$WEIGHT_PERCENT),] diff --git a/R/sp.pls.R b/R/sp.pls.R index 31a1c17..af012a5 100644 --- a/R/sp.pls.R +++ b/R/sp.pls.R @@ -1731,7 +1731,7 @@ rsp_profile_code_order <- function(data){ .tmp <- data.table::as.data.table(data)[, .(ans=length(unique(PROFILE_CODE))),by="SPECIES_NAME"] .tmp <- subset(.tmp, ans == max(.tmp$ans, na.rm=TRUE))$SPECIES_NAME .tmp <- subset(data, SPECIES_NAME %in% .tmp) - unique(.tmp$PROFILE_CODE) + sort(unique(.tmp$PROFILE_CODE)) } diff --git a/R/sp.reshape.R b/R/sp.reshape.R index 309f7a2..490f4d3 100644 --- a/R/sp.reshape.R +++ b/R/sp.reshape.R @@ -243,7 +243,7 @@ sp_melt_wide <- function(x, pad = TRUE, drop.nas = TRUE){ } if(is.character(pad)){ - out <- sp_pad(out) + out <- sp_pad(out, pad) # PROFILES <- as.data.table(sysdata$PROFILES) # SPECIES_PROPERTIES <- as.data.table(sysdata$SPECIES_PROPERTIES) diff --git a/man/sp.match.Rd b/man/sp.match.Rd index 1b2584d..b8b7fe8 100644 --- a/man/sp.match.Rd +++ b/man/sp.match.Rd @@ -38,7 +38,7 @@ See also \code{\link{sp_species_cor}}.} \item{method}{Character (default 'pd'), the similarity measure to use, current options 'pd', the Pearson's Distance (1- Pearson's correlation coefficient), -or 'sid', the Standardized Identity Distance.} +or 'sid', the Standardized Identity Distance (See References).} \item{test.x}{Logical (default FALSE). The match process self-tests by adding \code{x} to \code{ref}, which should generate a perfect fit=0 score. Setting