diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths
index c6ac331..db5a06c 100644
--- a/.Rproj.user/shared/notebooks/paths
+++ b/.Rproj.user/shared/notebooks/paths
@@ -1,7 +1,32 @@
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/ITS/projects/NERC_TRANSISTION/events/Clean Air Networks Conference/naei estimate.Rmd="1BB48545"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/_isolateContribution&breakPointAnalysis_KR_20230824.R="F18B98A3"
 C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/_paper_01_IntroToRespeciate/MS Access Versions/speciate_5.2_0/test.R="FCE2E494"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/marylebone02/_marylebone_metals_03.Rmd="63CDB89D"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/match01/_match_notes_01.Rmd="A446C96C"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/.Rbuildignore="BAFF788D"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/.Rproj.user/shared/notebooks/paths="DB89FCB7"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/.github/workflows/R-CMD-check.yml="33EF2D32"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/.gitignore="2CB231D9"
 C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/DESCRIPTION="8BA937B7"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/NAMESPACE="DBDDE80C"
 C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/NEWS.md="F6ED8BF4"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/respeciate-package.R="A43C9569"
 C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/respeciate.generics.R="54ECE8F1"
-C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.001.R="0357CDCE"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.R="58F2A9BE"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.average.R="0A1E36E4"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.cluster.R="49C0F861"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.cor.R="3F1DA8E5"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.info.R="D0E7AC03"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.match.R="4326C1C3"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.pad.R="4962C940"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.pls.R="EACFE9A4"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.rescale.R="D668C5B2"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.reshape.R="CC655C60"
 C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/speciate.R="6C6E673A"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/spq.R="3AD9ACC8"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/spx.R="CA18044A"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sysdata.R="82103C52"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/xxx.R="3415FF44"
 C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/README.Rmd="887EDA27"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/README.md="D46A00DB"
+C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/man/spec.Rd="8472593F"
diff --git a/DESCRIPTION b/DESCRIPTION
index 66551aa..97131a5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,14 +1,14 @@
 Package: respeciate
 Title: Speciation profiles for gases and aerosols
-Version: 0.2.1
-Date: 2023-06-21
-Description: Acess to the US.EPA Speciate (v5.2) tool, to generate speciation for gases and particles.
-           More details in Simon et al (2010) <doi:10.5094/APR.2010.026>.
+Version: 0.2.3
+Date: 2023-09-23
+Description: Acess to the US.EPA Speciate (v5.2) tool, to generate speciation profiles for
+          gases and particles. More details in Simon et al (2010) <doi:10.5094/APR.2010.026>.
 Type: Package
-Authors@R: c( person(given = "Sergio", family = "Ibarra-Espinosa", role = c("aut", "cre"), email =
-           "sergio.ibarra@usp.br", comment = c(ORCID = "0000-0002-3162-1905")), person(given = "Karl",
-           family = "Ropkins", role = c("aut"), email = "k.ropkins@its.leeds.ac.uk", comment = c(ORCID
-           = "0000-0002-0294-6997")) )
+Authors@R: c( person(given = "Sergio", family = "Ibarra-Espinosa", role = c("aut", "cre"),
+          email = "sergio.ibarra@usp.br", comment = c(ORCID = "0000-0002-3162-1905")),
+          person(given = "Karl", family = "Ropkins", role = c("aut"), email =
+          "k.ropkins@its.leeds.ac.uk", comment = c(ORCID = "0000-0002-0294-6997")) )
 License: MIT + file LICENSE
 URL: https://github.com/atmoschem/respeciate
 BugReports: https://github.com/atmoschem/respeciate/issues
@@ -16,3 +16,4 @@ LazyData: yes
 Depends: R (>= 3.5.0)
 RoxygenNote: 7.2.3
 Encoding: UTF-8
+Imports: data.table
diff --git a/NAMESPACE b/NAMESPACE
index b640aa1..cb9c27a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,13 +2,80 @@
 
 S3method(plot,respeciate)
 S3method(print,respeciate)
-S3method(print,respeciate.ref)
-S3method(print,respeciate.spcs)
+S3method(summary,respeciate)
 export(find_code)
+export(pls_fit_parent)
+export(pls_plot)
+export(pls_plot_profile)
+export(pls_plot_species)
+export(pls_refit_species)
+export(pls_report)
+export(sp_average_profile)
+export(sp_dcast)
+export(sp_dcast_profile)
+export(sp_dcast_species)
 export(sp_find_profile)
 export(sp_find_species)
+export(sp_info)
+export(sp_match_profile)
+export(sp_melt_wide)
+export(sp_pad)
+export(sp_pls_profile)
 export(sp_profile)
+export(sp_profile_distance)
+export(sp_profile_info)
+export(sp_rescale)
+export(sp_rescale_profile)
+export(sp_rescale_species)
+export(sp_species_cor)
+export(sp_species_info)
 export(spec)
+export(spq_gas)
+export(spq_other)
+export(spq_pm)
+export(spq_pm.ae6)
+export(spq_pm.ae8)
+export(spq_pm.cr1)
+export(spq_pm.simplified)
+export(spx_btex)
+export(spx_copy)
+export(spx_n_alkane)
+import(data.table)
+importFrom(grDevices,as.graphicsAnnot)
+importFrom(grDevices,cm.colors)
+importFrom(grDevices,colorRampPalette)
+importFrom(grDevices,dev.flush)
+importFrom(grDevices,dev.hold)
+importFrom(grDevices,heat.colors)
+importFrom(graphics,abline)
 importFrom(graphics,axis)
 importFrom(graphics,barplot)
+importFrom(graphics,grid)
+importFrom(graphics,legend)
+importFrom(graphics,lines)
+importFrom(graphics,mtext)
 importFrom(graphics,par)
+importFrom(graphics,plot.new)
+importFrom(graphics,plot.window)
+importFrom(graphics,points)
+importFrom(graphics,polygon)
+importFrom(graphics,rect)
+importFrom(graphics,text)
+importFrom(graphics,title)
+importFrom(stats,AIC)
+importFrom(stats,as.formula)
+importFrom(stats,coefficients)
+importFrom(stats,cophenetic)
+importFrom(stats,cor)
+importFrom(stats,cutree)
+importFrom(stats,dist)
+importFrom(stats,formula)
+importFrom(stats,hclust)
+importFrom(stats,heatmap)
+importFrom(stats,lm)
+importFrom(stats,nls)
+importFrom(stats,nls.control)
+importFrom(stats,predict)
+importFrom(stats,sd)
+importFrom(utils,head)
+importFrom(utils,modifyList)
diff --git a/NEWS.md b/NEWS.md
index ae27469..91b8ee8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,26 +1,43 @@
-NEWS
-===========
+# Version 0.0 - Release Notes
 
-### respeciate 0.2.1 (Release date: 2023-06-21)
+* [0.2.3] 
+    * released 2023-09-26 
+    * added functions: sp_rescale_species, sp_dcast, sp_dcast_species 
+    sp_pad, sp_pls_profile 
+    * added draft spq_ quick profile build  functions 
+    * added pls_ functions for sp_pls_profile outputs
+    * sp_profile update: references not included by default 
+    * sp_match: changed n to matches; added min.n (so more consistent 
+    with other functions); now averages x
+    * sp_species_cor: added heatmap and key list options 
 
-- updated sysdata (now using SPECIATE 5.2)
-- added sp\_find\_species 
-- extended sp\_find\_profile; now allows by='species_name'
-- simplified plot.respeciate
+* [0.2.2] 
+    * released 2023-08-24 
+    * now imports data.table; moving to data.table methods for speed 
+    * changed news format because build_news missing previous md text 
+    * added functions: sp_rescale_profile, sp_dcast_profile, sp_species_cor, 
+    sp_profile_dist, sp_match_profile, sp_melt_wide
+    * reduced object classes
+    
+* [0.2.1] 
+    * released 2023-06-21 
+    * updated sysdata (now using SPECIATE 5.2)
+    * added sp_find_species 
+    * extended sp_find_profile; now searches by species_name
+    * simplified plot.respeciate
 
+* [0.2.0] 
+    * released 2021-05-26 
+    * Karl Ropkins joined
+    * moved your sysdata.rda to the package data folder
+    * reset lazy.data to TRUE  
+    * added an object class
+    * added sp_find_profile (find_code but making object class)
+    * added sp_profile (spec but making object class)
+    * added crude print and plot methods for object classes
+    * updated date and version
+    * The new code is in R:speciate.0.2.r
 
-### respeciate 0.2.0 (Release date: 2021-05-26)
-
-- Karl Ropkins joined
-- moved your sysdata.rda to the package data folder
-- reset lazy.data to TRUE  
-- added an object class
-- added sp\_find\_profile (find\_code but making object class)
-- added sp\_profile (spec but making object class)
-- added crude print and plot methods for object classes
-- updated date and version
-- The new code is in R/speciate.0.2.r
-
-### respeciate 0.1.0 (Release date: 2020-12-20)
-
-- Create respeciate
+* [0.1.0] 
+    * released 2020-12-20  
+    * Created respeciate
diff --git a/R/respeciate.generics.R b/R/respeciate.generics.R
index c44a8d9..1f543be 100644
--- a/R/respeciate.generics.R
+++ b/R/respeciate.generics.R
@@ -1,75 +1,133 @@
 #' @name respeciate.generics
 #' @title respeciate.generics
-#' @importFrom graphics axis barplot par
-
-#' @description respeciate object classes and generic functions
-
-#reversed order of documentation,
-#started with the find function...
+#' @description \code{respeciate} object classes and generic functions.
 
+########################
+#might move all the text to top
+#    hard to keep style consistent when docs are in between
+#    multiple functions
 
 #' @description When supplied a \code{respeciate}
 #' object, \code{\link{print}} manages its appearance.
+#' @description When supplied a \code{respeciate}
+#' object, \code{\link{plot}} provides a basic plot
+#' output. This uses base function \code{\link{barplot}};
+#' also see note.
 #' @param x the \code{respeciate}
 #' object to be printed, plotted, etc.
-#' @param n when plotting or printing a multi-profile, the
-#' number(s) of profile(s) to report.
+#' @param n when plotting or printing a multi-profile object, the
+#' maximum number of profiles to report. (When plotting, \code{n}
+#' is ignored if \code{id} is also set.)
 #' @param ... any extra arguments, mostly ignored except by
 #' \code{plot} which passes them to \code{\link{barplot}}.
+#' @param object like \code{x} but for \code{summary}.
+#' @param id numeric, indices of profiles to use when
+#' plotting (\code{id=1:6} is equivalent to \code{n=6}).
+#' @param order logical, order the species in the
+#' profile(s) by relative abundance before plotting.
+#' @note \code{respeciate} objects revert to
+#' \code{data.frame}s when not doing anything
+#' package-specific, so you can still
+#' use as previously with \code{lattice} or
+#' \code{ggplot2}, useful if you are pulling multiple
+#' profiles and you exceed the base \code{\link{barplot}}
+#' capacity...
+
+
+#notes
+##################################
+
+#loosing the respeciate.ref and respeciate.spcs classes
+#    testing merge them into respeciate
+
+#         with different print outputs?
+#         only plot for class plot, etc???
+
 #' @rdname respeciate.generics
 #' @method print respeciate
 #' @export
 
-print.respeciate <-
-  function(x, n=6, ...){
-    #shifted from REF_Code to PROFILE_Code
-    #   when allowing multipe profiles
-    #   because REF_Code not unique
-    y <- unique(x$PROFILE_CODE)
-    report <- paste("respeciate profile(s): count ",
-                    length(y), "\n", sep="")
-    if(length(y)==0){
-      cat(paste(report,
-                "empty (or bad?) respeciate object\n",
-                sep=""))
-      return(invisible(x))
-    }
-    yy <- if(length(y)>n) {y[1:n]} else {y}
-    for(i in yy){
-      report <- paste(report, "  ", i, " (checksum: ",
-                      sum(as.numeric(as.character(x[x$PROFILE_CODE==i,]$WEIGHT_PERCENT)),
-                          na.rm = T), ")\n", sep="")
+print.respeciate <- function(x, n = NULL, ...){
+  test <- rsp_test_respeciate(x, level = 2, silent = TRUE)
+  if(test == "respeciate.profile.ref"){
+    if(is.null(n)){
+      n <- 100
     }
-    if(length(y)>n){
-      report <- paste(report, "  ... not showing last ",
-                      length(y)-n, "\n", sep="")
+    return(rsp_print_respeciate_profile(x=x, n=n, ...))
+  }
+  if(test == "respeciate.species.ref"){
+    if(is.null(n)){
+      n <- 10
     }
-    cat(report, sep="")
-    invisible(x)
+    return(rsp_print_respeciate_species(x=x, n=n, ...))
+  }
+  if(is.null(n)){
+    n <- 10
   }
+  rsp_print_respeciate(x=x, n=n, ...)
+}
+
+## rsp_print functions unexported
+##    further down
+##    VVVVVVVVVVVV
+##    VVVVVVVVVVVV
+
+
 
-#' @description When supplied a \code{respeciate}
-#' object, \code{\link{plot}} provides a basic plot
-#' output. This uses base function \code{\link{barplot}};
-#' also see note.
-#' @note \code{respeciate} objects revert to
-#' \code{data.frame}s when not doing anything
-#' package-specific, so you can still
-#' use as previously with \code{lattice} or
-#' \code{ggplot2}, useful if you are pulling multiple
-#' profiles and you exceed the base \code{\link{barplot}}
-#' capacity...
-#' @param id numeric, indices of profiles to use when
-#' plotting (nb: \code{n=6} is equivalent to code{id=1:6}).
-#' @param order logical, order the species in the
-#' profile(s) by relative abundance before plotting.
 #' @rdname respeciate.generics
 #' @method plot respeciate
 #' @export
 
+##########################
+#notes
+##########################
+#like....
+#better handling of factor axis labels
+#better handling of axes and legend font sizes
+#    (I think previous code may have handled this a little better)
+#    (but not perfectly...)
+#like horiz=total scales to be other way around?
+#    could also mean rethinking the legend position for this?
+
+############################
+#added warning/handling for
+#  duplicate species in profiles (handling merge/mean)
+#  duplicated profile names (handling make unique)
+
+#test is now set up to use data.table
+
+
+
 plot.respeciate <-
   function(x, n=NULL, id=NULL, order=TRUE, ...){
 
+    #add .value if not there
+    ## don't think .value works
+    x <- rsp_tidy_profile(x)
+
+    ##test object type
+    test <- rsp_test_respeciate(x, level=2, silent=TRUE)
+    if(test != "respeciate"){
+      if(test %in% c("respeciate.profile.ref", "respeciate.species.ref")){
+        stop("No plot method for respeciate.reference files.")
+      } else {
+        stop("suspect respeciate object!")
+      }
+      #don't stop - respeciate profile
+    }
+
+    ##test something to plot
+    if(nrow(x)==0){
+      ######################
+      #think about this
+      ######################
+      #maybe stop() instead???
+      #stop("empty respeciate object?")
+      return(invisible(NULL))
+    }
+
+    #hold extra args
+    #  passing to plot
     .xargs <- list(...)
 
     #test number of profiles
@@ -91,14 +149,40 @@ plot.respeciate <-
                     "\n", sep=""))
     }
 
-    x <- rsp_test(x)
-    if(any(x$COUNT>1)){
+    x <- rsp_test_profile(x)
+
+
+    if(any(x$.n>1)){
+      warning(paste("\n\t",
+                    " found duplicate species in profiles (merged and averaged...)",
+                    "\n", sep=""))
+    }
+    x$SPECIES_NAME <- rsp_tidy_species_name(x$SPECIES_NAME)
+
+    ####################################
+    #issue profile names are not always unique
+    ####################################
+    test <- x
+    test$SPECIES_ID <- ".default"
+    test <- rsp_test_profile(test)
+    ###################
+    #rep_test
+    #can now replace this with data.table version
+    #BUT check naming conventions for .n
+    ###################
+
+    #does this need a warning?
+    if(length(unique(test$PROFILE_NAME))<nrow(test)){
       warning(paste("\n\t",
-                    " found duplicate profiles (merged and averaged...)",
+                    " found profiles with common names (making unique...)",
                     "\n", sep=""))
+      test$PROFILE_NAME <- make.unique(test$PROFILE_NAME)
+      x <- x[names(x) != "PROFILE_NAME"]
+      x <- merge(x, test[c("PROFILE_NAME", "PROFILE_CODE")], by="PROFILE_CODE")
     }
 
-    x$SPECIES_NAME <- rsp_tidy_species_name(x$SPECIES_NAME)
+
+    #x$PROFILE_NAME <- make.unique(x$PROFILE_NAME)
 
     #order largest to smallest
     #############################
@@ -110,12 +194,12 @@ plot.respeciate <-
       ################################
       test <- x
       test$PROFILE_CODE <- ".default"
-      test <- rsp_test(test)
+      test <- rsp_test_profile(test)
       if("beside" %in% names(.xargs) && .xargs$beside){
         test <- x[order(x$WEIGHT_PERCENT, decreasing = TRUE),]
         xx <- unique(test$SPECIES_NAME)
       } else {
-        test <- test[order(test$TOTAL, decreasing = TRUE),]
+        test <- test[order(test$.total, decreasing = TRUE),]
         xx <- unique(test$SPECIES_NAME)
       }
     } else {
@@ -163,85 +247,268 @@ plot.respeciate <-
   }
 
 
+##################################
+#summary
+##################################
 
+#like something to get us to...
+#database.summary
+#summary.respeciate(sysdata$PROFILES)
 
-
-
-#' @description When supplied a \code{respeciate.ref}
-#' object, \code{\link{print}} manages its appearance.
-#' @param x the \code{respeciate} or \code{respeciate.ref}
-#' object to be printed, plotted, etc.
 #' @rdname respeciate.generics
-#' @method print respeciate.ref
+#' @method summary respeciate
 #' @export
-print.respeciate.ref <-
-  function(x, ...){
-    xx <- nrow(x)
-    wi <- getOption("width")
-    ####################################
-    #use of cat might need rethinking?
-    ####################################
-    cat("respeciate profile reference\n")
-    if(xx>100){
-      report <- c(x$PROFILE_CODE[1:100], "...")
-      comment <- " profiles [showing first 100]\n"
+
+
+# see below about alternative summary output
+#   including check sum?
+
+#   maybe table
+#   profile (code); (name); type; n.species (count); checksum; comments
+#   just show some but send all
+
+# replacing previous
+
+summary.respeciate <-
+  function(object, ...){
+    #v0.1 summary
+    #n <- object$PROFILE_TYPE
+    #n <- n[!duplicated(object$PROFILE_CODE)]
+    #summary(factor(n))
+
+    #v0.3 summary
+
+    xx <- as.data.table(object)
+
+    .xargs <- list(...)
+    .max.n <- if("max.n" %in% names(.xargs)){
+      .xargs$max.n
     } else {
-      report <- x$PROFILE_CODE
-      comment <- " profiles\n"
+      10
+    }
+    .silent <- if("silent" %in% names(.xargs)){
+      .xargs$silent
+    } else {
+      FALSE
     }
-    if(xx>0) cat(report, fill=wi)
-    cat("   > ", xx, comment, sep="")
-    invisible(x)
-  }
 
+    #check what we have
+    test <- c("WEIGHT_PERCENT","PROFILE_NAME","PROFILE_TYPE",
+              "SPECIES_ID")
+    test <- test[ test %in% colnames(xx)]
+    if(!"PROFILE_CODE" %in% colnames(xx)){
+      xx$PROFILE_CODE <- "{{ICK}}"
+    }
 
-#' @rdname respeciate.generics
-#' @method print respeciate.spcs
-#' @export
-print.respeciate.spcs <-
-  function(x, ...){
-    xx <- nrow(x)
-    wi <- getOption("width")
-    ####################################
-    #use of cat might need rethinking?
-    ####################################
-    cat("respeciate species\n")
-    if(xx>10){
-      report <- c(x$SPECIES_NAME[1:10], "...")
-      comment <- " species [showing first 10]\n"
-    } else {
-      report <- x$SPECIES_NAME
-      comment <- " species\n"
+    out <- xx[,
+              .(#SPECIES_NAME = SPECIES_NAME[1],
+                #SPEC_MW = SPEC_MW[1],
+                .checksum = if("WEIGHT_PERCENT" %in% names(xx)){
+                  sum(WEIGHT_PERCENT, na.rm = TRUE)
+                } else {
+                  NA
+                },
+                .checkname = if("PROFILE_NAME" %in% names(xx)){
+                  length(unique(PROFILE_NAME))}
+                else {
+                  NA
+                },
+                .name = if("PROFILE_NAME" %in% names(xx)){
+                  PROFILE_NAME[1]
+                } else {
+                  NA
+                },
+                .type = if("PROFILE_TYPE" %in% names(xx)){
+                  PROFILE_TYPE[1]
+                } else {
+                  NA
+                },
+                .nspecies = if("SPECIES_ID" %in% names(xx)){
+                  length(unique(SPECIES_ID))
+                } else {
+                  NA
+                }
+              ),
+              by=.(PROFILE_CODE)]
+
+    #out <- merge(xx, out, by="PROFILE_CODE", all.x=TRUE, all.y=FALSE,
+    #             allow.cartesian=TRUE)
+
+    out$PROFILE_CODE[out$PROFILE_CODE=="{{ICK}}"] <- NA
+    out <- as.data.frame(out)
+    if(!.silent){
+      if(nrow(out) > .max.n){
+        print(head(out[c(1,2,5,6)], n = .max.n))
+        cat("  [forestortened - showing ", .max.n, " of ", nrow(out), "]",
+            sep="")
+      } else {
+        print(out[c(1,2,5,6)])
+      }
     }
-    if(xx>0) cat(report, fill=wi)
-    cat("   > ", xx, comment, sep="")
-    invisible(x)
+    invisible(out)
+
+
+
+
   }
 
 
 
 
+
+
 #################################
 #unexported code
 #################################
 
+
+# like to do something like this for summary
+#     but code very messy
+#     AND run time for 100+ profiles is too slow...
+
+#     try with data.table...
+
+
+#rsp_summary_v3 <- function(object, ...){
+
+#  xx <- as.data.table(object)
+
+#  .xargs <- list(...)
+#  .max.n <- if("max.n" %in% names(.xargs)){
+#    .xargs$max.n
+#  } else {
+#    10
+#  }
+#  .silent <- if("silent" %in% names(.xargs)){
+#    .xargs$silent
+#  } else {
+#    FALSE
+#  }
+
+
+  #check what we have
+#  test <- c("WEIGHT_PERCENT","PROFILE_NAME","PROFILE_TYPE",
+#            "SPECIES_ID")
+#  test <- test[ test %in% colnames(xx)]
+#  if(!"PROFILE_CODE" %in% colnames(xx)){
+#    xx$PROFILE_CODE <- "{{ICK}}"
+#  }
+
+#  out <- xx[,
+#            .(#SPECIES_NAME = SPECIES_NAME[1],
+#              #SPEC_MW = SPEC_MW[1],
+#              .checksum = if("WEIGHT_PERCENT" %in% names(xx)){
+#                sum(WEIGHT_PERCENT, na.rm = TRUE)
+#              } else {
+#                NA
+#              },
+#              .checkname = if("PROFILE_NAME" %in% names(xx)){
+#                length(unique(PROFILE_NAME))}
+#              else {
+#                NA
+#              },
+#              .name = if("PROFILE_NAME" %in% names(xx)){
+#                PROFILE_NAME[1]
+#              } else {
+#                NA
+#              },
+#              .type = if("PROFILE_TYPE" %in% names(xx)){
+#                PROFILE_TYPE[1]
+#              } else {
+#                NA
+#              },
+#              .nspecies = if("SPECIES_ID" %in% names(xx)){
+#                length(unique(SPECIES_ID))
+#              } else {
+#                NA
+#              }
+#            ),
+#            by=.(PROFILE_CODE)]
+
+  #out <- merge(xx, out, by="PROFILE_CODE", all.x=TRUE, all.y=FALSE,
+  #             allow.cartesian=TRUE)
+
+#  out$PROFILE_CODE[out$PROFILE_CODE=="{{ICK}}"] <- NA
+#  out <- as.data.frame(out)
+#  if(!.silent){
+#    if(nrow(out) > .max.n){
+#      print(head(out[c(1,2,5,6)], n = .max.n))
+#      cat("  [forestortened - showing ", .max.n, " of ", nrow(out), "]",
+#          sep="")
+#    } else {
+#      print(out[c(1,2,5,6)])
+#    }
+#  }
+#  invisible(out)
+
+#}
+
+#rsp_summary_v2 <-
+#  function(object, ...){
+#    #v0.2 summary
+#    if(!"PROFILE_CODE" %in% names(object)){
+#      object$PROFILE_CODE <- "{{NA}}"
+#    }
+#    ref <- unique(object$PROFILE_CODE)
+#    if(length(ref)>10){
+#      ref <- ref[1:100]
+#    }
+#    .out <- lapply(ref, function(x){
+#      .tmp <- subset(object, PROFILE_CODE==x)
+#      .x <- if(x=="{{NA}}") {NA} else {x}
+#      .pt <- if("PROFILE_TYPE" %in% names(.tmp)){
+#        .tmp$PROFILE_TYPE[1]
+#      } else {
+#        NA
+#      }
+#      .pn <- if("PROFILE_NAME" %in% names(.tmp)){
+#        .tmp$PROFILE_NAME[1]
+#      } else {
+#        NA
+#      }
+#      .ns <- if("SPECIES_ID" %in% names(.tmp)){
+#        length(unique(.tmp$SPECIES_ID))
+#      } else {
+#        NA
+#      }
+#      .cs <- if("WEIGHT_PERCENT" %in% names(.tmp)){
+#        sum(.tmp$WEIGHT_PERCENT, na.rm=TRUE)
+#      } else {
+#        NA
+#      }
+#      data.frame(profile=.x,
+#                 type=.pt,
+#                 name=.pn,
+#                 n.species=.ns,
+#                 checksum=.cs)
+#    })
+#    .out <- do.call(rbind, .out)
+#    print(.out[c("profile", "type", "n.species", "checksum")], max=40)
+#    return(invisible(.out))
+#  }
+
+
+
+
 ###################################
 #class builds
 ###################################
 
-rsp_build_respeciate.spcs <-
-  function(x, ...){
+#rsp_build_respeciate.spcs <-
+#  function(x, ...){
     #build
-    class(x) <- c("respeciate.spcs", "data.frame")
-    x
-  }
-
-rsp_build_respeciate.ref <-
-  function(x, ...){
+    #add .value
+#    x <- rsp_tidy_profile(x)
+#    class(x) <- c("respeciate.spcs", "data.frame")
+#    x
+#  }
+
+#rsp_build_respeciate.ref <-
+#  function(x, ...){
     #build
-    class(x) <- c("respeciate.ref", "data.frame")
-    x
-  }
+#    class(x) <- c("respeciate.ref", "data.frame")
+#    x
+#  }
 
 rsp_build_respeciate <-
   function(x, ...){
@@ -250,50 +517,154 @@ rsp_build_respeciate <-
     x
   }
 
+
 ###########################
-#tidy names
+#split respeciate by profile
 ###########################
 
 #currently not exported
-#quick code to tidy species names
+#quick code assumed CODE is unique to profile
 
-#note: not fully tested
+#need to test this
 
-rsp_tidy_species_name <- function(x){
+#not sure we are using this any more
+#    i think rsp_test, then rsp_test.2 replaced
+#    and code in plot.respeciate.old ???
 
-  #attempts shorten names by remove other versions
-  #names seem to be in format a (or b || c)
-  #where (guessing) a is main name and
-  #         b and c are alternatives.
+rsp_split_profile <- function(x){
+  ref <- unique(x$PROFILE_CODE)
+  lapply(ref, function(y) x[x$PROFILE_CODE==y,])
+}
 
-  #not fully tested,
-  #   might still be more cases this dies on
 
-  #gsub("[(].*","", x) failed if name a includes brackets
-  #example:#"(2-methylpropyl)benzene (or isobutylbenzene)"
+########################
+#unexported rsp_print
+########################
 
-  #sub("[(][^(]or+$", "", x) failed if b or c includes brackets
-  #also left space at end so needed sub("^\\s+|\\s+$", "", x)
+## like to tidy this/these
+
+# respeciate profile(s)
+#    [profile_code] [check sum] [profile_name] < width limited
+#    ... showing n
+
+# added profile_name to output
+
+# could move check sum to summary and
+#      replace with species count???
+
+# doing this... previous
+###.msg <- paste("  ", i, " (checksum: ",
+##round(sum(as.numeric(as.character(x[x$PROFILE_CODE==i,]$WEIGHT_PERCENT)),
+##          na.rm = T), digits=2),
+##") ", i2, "\n", sep="")
+
+# could make other respeciate print outputs
+#      look like this?
+
+rsp_print_respeciate <-
+  function(x, n=6, ...){
+    #profile_code is (I think) only term unique to a profile
+    y <- unique(x$PROFILE_CODE)
+    report <- paste("respeciate profile(s): count ",
+                    length(y), "\n", sep="")
+    if(length(y)==0){
+      cat(paste(report,
+                "empty (or bad?) respeciate object\n",
+                sep=""))
+      return(invisible(x))
+    }
+    .tmp <- getOption("width")
+    yy <- if(length(y)>n) {y[1:n]} else {y}
+    for(i in yy){
+      if("PROFILE_NAME" %in% names(x)){
+        i2 <- x$PROFILE_NAME[x$PROFILE_CODE==i][1]
+      } else {
+        i2 <- "[unknown]"
+      }
+      if("SPECIES_ID" %in% names(x)){
+        .spe <- length(unique(x$SPECIES_ID[x$PROFILE_CODE==i]))
+      } else {
+        .spe <- "0!"
+      }
+      .msg <- paste("  ", i, " (", .spe, " species) ",
+                    i2, "\n", sep="")
+      if(nchar(.msg)>.tmp){
+        .msg <- paste(substring(.msg, 1, .tmp-3), "...\n")
+      }
+      report <- paste(report, .msg, sep="")
+    }
+    if(length(y)>n){
+      report <- paste(report, "  ... not showing last ",
+                      length(y)-n, "\n", sep="")
+    }
+    cat(report, sep="")
+    invisible(x)
+  }
+
+
+## #' @description When supplied a \code{respeciate.ref}
+# #' object, \code{\link{print}} manages its appearance.
+# #' @param x the \code{respeciate} or \code{respeciate.ref}
+# #' object to be printed, plotted, etc.
+# #' @rdname respeciate.generics
+# #' @method print respeciate.ref
+# #' @export
+rsp_print_respeciate_profile <-
+  function(x, n = 100, ...){
+    xx <- nrow(x)
+    wi <- getOption("width")
+    ####################################
+    #use of cat might need rethinking?
+    ####################################
+    cat("respeciate profile reference\n")
+    if(n>xx){
+      n <- xx
+    }
+    if(xx>n){
+      report <- c(x$PROFILE_CODE[1:n], "...")
+      comment <- paste(" profiles [showing first ", n,
+                       "]\n", sep = "")
+    } else {
+      report <- x$PROFILE_CODE
+      comment <- " profiles\n"
+    }
+    if(xx>0) cat(report, fill=wi)
+    cat("   > ", xx, comment, sep="")
+    invisible(x)
+  }
+
+
+# #' @rdname respeciate.generics
+# #' @method print respeciate.spcs
+# #' @export
+rsp_print_respeciate_species <-
+  function(x, n = 10, ...){
+    xx <- nrow(x)
+    wi <- getOption("width")
+    ####################################
+    #use of cat might need rethinking?
+    ####################################
+    cat("respeciate species reference\n")
+    if(n>xx){
+      n <- xx
+    }
+    if(xx>n){
+      report <- c(x$SPECIES_NAME[1:n], "...")
+      comment <- paste(" species [showing first ", n,
+                       "]\n", sep="")
+    } else {
+      report <- x$SPECIES_NAME
+      comment <- " species\n"
+    }
+    if(xx>0) cat(report, fill=wi)
+    cat("   > ", xx, comment, sep="")
+    invisible(x)
+  }
 
-  #sometimes it is "( or "
-  x <- gsub(" [(] or ", " (or ", x)
-  #next removes from last "(or" onwards
-  x <- gsub("[(]or .*","", x)
-  sub("^\\s+|\\s+$", "", x)
-}
 
 
-###########################
-#split respeciate by profile
-###########################
 
-#currently not exported
-#quick code assumed CODE is unique to profile
 
-rsp_split_profile <- function(x){
-  ref <- unique(x$PROFILE_CODE)
-  lapply(ref, function(y) x[x$PROFILE_CODE==y,])
-}
 
 
 
@@ -427,26 +798,6 @@ plot.respeciate.old <-
     }
   }
 
-rsp_test <- function(x){
-  .prf <- unique(x$PROFILE_CODE)
-  ans <- lapply(.prf, function(y){
-    temp <- subset(x, PROFILE_CODE==y)
-    .spc <- unique(temp$SPECIES_ID)
-    ans <- lapply(.spc, function(z){
-      temp2 <- subset(temp, SPECIES_ID==z)
-      data.frame(PROFILE_CODE = y,
-                 PROFILE_NAME = temp2$PROFILE_NAME[1],
-                 SPECIES_ID = z,
-                 SPECIES_NAME = temp2$SPECIES_NAME[1],
-                 COUNT = length(temp2$WEIGHT_PERCENT[!is.na(temp2$WEIGHT_PERCENT)]),
-                 TOTAL = sum(temp2$WEIGHT_PERCENT[!is.na(temp2$WEIGHT_PERCENT)]),
-                 SPEC_MW = temp2$SPEC_MW[1],
-                 WEIGHT_PERCENT=mean(temp2$WEIGHT_PERCENT, na.rm=TRUE))
-    })
-    do.call(rbind, ans)
-  })
-  do.call(rbind, ans)
-}
 
 
 
diff --git a/R/sp.001.R b/R/sp.001.R
deleted file mode 100644
index a57da41..0000000
--- a/R/sp.001.R
+++ /dev/null
@@ -1,178 +0,0 @@
-#' @name sp.001
-#' @title sp_ functions
-#' @aliases sp_find_profile sp_find_species sp_profile
-
-utils::globalVariables(c("SPECIES_ID"))
-
-#' @description sp functions for use with (re)speciate data in R...
-
-#reversed order of documentation,
-#started with the find function...
-
-#' @description \code{sp_find} functions search main data sets
-#' in the (re)SPECIATE archive using supplied search terms.
-#' \code{\link{sp_find_profile}} searches for profile records and
-#' \code{\link{sp_find_species}} searches for species records.
-#' @param ... for \code{sp_find} functions, character(s), any
-#' search term(s) to use when searching the local (re)SPECIATE archive for
-#' relevant records.
-#' @param by character, the section of the archive to
-#' search, by default \code{'keywords'} for \code{\link{sp_find_profile}} and
-#' \code{'species_names'} for \code{\link{sp_find_species}}.
-#' @param partial logical, if \code{TRUE} (default)
-#' \code{sp_find} functions use partial matching.
-#' @return \code{sp_find_profile} returns a object of
-#' \code{respeciate.ref} class, a \code{data.frame} of
-#' profile information.
-#' @examples \dontrun{
-#' profile <- "Ethanol"
-#' dt <- sp_find_profile(profile)
-#' dt}
-#'
-
-#' @rdname sp.001
-#' @export
-
-sp_find_profile <- function(..., by = "keywords", partial = TRUE) {
-  #extract profile info from archive
-  out <- sysdata$PROFILES
-  terms <- c(...)
-  ###################################
-  #ignoring case because missing loads...
-  #   might want to think about spaces as well???
-  #currently same in sp_find_species
-  #   should think about using common code???
-  #   also error messaging if by is not known???
-  ###################################
-  #how to handle searching a profile
-  #that contains a specific species
-  #    this is all the profiles that have an entry for species_id 529
-  #    sysdata$SPECIES$PROFILE_CODE[sysdata$SPECIES$SPECIES_ID==529]
-  #    could use sp_find_species to get species info, then
-  ###################################
-  if(tolower(by) %in% c("species_name")){
-    ############################
-    #special case
-    #search by species_name
-    #    could add species_id, cas, etc?
-    ###############################
-    species <- sysdata$SPECIES
-    ref <- out$PROFILE_CODE
-    for(ti in terms){
-      ans <- sp_find_species(ti, by=by, partial=partial)
-      terms <- species$PROFILE_CODE[species$SPECIES_ID %in% ans$SPECIES_ID]
-      ref <- ref[ref %in% terms]
-    }
-    out <- out[out$PROFILE_CODE %in% ref,]
-  } else {
-    for(ti in terms){
-      ref <- out[,tolower(names(out))==by]
-      if(nrow(out)>0){
-        out <- if(partial){
-          out <- out[grep(ti, ref, ignore.case = TRUE),]
-        } else {
-          out <- out[tolower(ti)==tolower(ref),]
-        }
-      }
-    }
-  }
-  out <- rsp_build_respeciate.ref(out)
-  return(out)
-}
-
-#' @return \code{sp_find_species} returns a object of
-#' \code{respeciate.spcs} class, a \code{data.frame} of
-#' species information.
-#' @examples \dontrun{
-#' species <- "Ethanol"
-#' sp <- sp_find_species(species)
-#' sp}
-#'
-
-#' @rdname sp.001
-#' @export
-
-sp_find_species <- function(..., by = "species_name", partial = TRUE) {
-  #extract species info from archive
-  out <- sysdata$SPECIES_PROPERTIES
-  terms <- c(...)
-  for(ti in terms){
-    ref <- out[,tolower(names(out))==by]
-    if(nrow(out)>0){
-      out <- if(partial){
-        out <- out[grep(ti, ref, ignore.case = TRUE),]
-      } else {
-        out <- out[tolower(ti)==tolower(ref),]
-      }
-    }
-  }
-  #out <- PROFILES[grep(term, PROFILES[[by]], ignore.case = TRUE), ]
-  out <- rsp_build_respeciate.spcs(out)
-  return(out)
-}
-
-
-
-
-
-
-#' @description \code{\link{sp_profile}} extracts a
-#' SPECIATE profile from the local (re)SPECIATE archive.
-#' @param code character or numeric, the SPECIATE code
-#' of the required profile (EPA SPECIATE term PROFILE_CODE).
-#' @return \code{sp_profile} returns a object of
-#' \code{respeciate} class, a \code{data.frame} containing a
-#' speciate profile.
-#' @references
-#' Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D.,
-#' Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010.
-#' The development and uses of EPA SPECIATE database.
-#' Atmospheric Pollution Research, 1(4), pp.196-206.
-#' @examples \dontrun{
-#' x <- sp_profile(c(8833, 8850))
-#' plot(x)}
-
-#NOTE
-
-#get_profile allows you to get multiple profiles
-#not sure this is staying
-
-
-
-#' @rdname sp.001
-#' @export
-
-sp_profile <- function(code) {
-  #handle numerics/characters
-  #######################
-  #could replace code with ...???
-  ######################
-  if(is.numeric(code)) code <- as.character(code)
-  if(!is.character(code)) stop("unexpected code class")
-
-  PROFILES <- sysdata$PROFILES
-  SPECIES <- sysdata$SPECIES
-  SPECIES_PROPERTIES <- sysdata$SPECIES_PROPERTIES
-  PROFILE_REFERENCE <- sysdata$PROFILE_REFERENCE
-  REFERENCES <- sysdata$REFERENCES
-
-  #handle multiple codes
-  ############################
-  #go direct with %in% ???
-  #text as sp_profile2 ???
-  ############################
-  df <- lapply(code, function(x){
-    df <- PROFILES[PROFILES$PROFILE_CODE == x, ]
-    df <- merge(df, SPECIES, by = "PROFILE_CODE")
-    df <- merge(df, SPECIES_PROPERTIES, by = "SPECIES_ID")
-    df <- merge(df, PROFILE_REFERENCE, by = "PROFILE_CODE")
-    df <- merge(df, REFERENCES, by = "REF_Code")
-    df
-  })
-  #build
-  df <- do.call(rbind, df)
-  df <- rsp_build_respeciate(df)
-  return(df)
-}
-
-
diff --git a/R/sp.R b/R/sp.R
new file mode 100644
index 0000000..d1f1c8b
--- /dev/null
+++ b/R/sp.R
@@ -0,0 +1,191 @@
+#' @name sp
+#' @title sp_ functions
+#' @aliases sp_profile
+
+
+#' @description sp function to get profiles from the R (re)SPECIATE archive
+
+#' @description \code{\link{sp_profile}} extracts a
+#' SPECIATE profile from the local (re)SPECIATE archive.
+#' @param code character, numeric or data.frame, the SPECIATE code
+#' of the required profile (EPA SPECIATE identifier PROFILE_CODE). This is
+#' typically one or concatenated character or numeric entries, but can also
+#' be a \code{respeciate} object or similar \code{data.frame} containing
+#' the \code{code}s as a named \code{PROFILE_NAME} column.
+#' @param include.refs logical, include profile reference information when
+#' getting the requested profile(s) from the archive, default \code{FALSE}.
+#' @return \code{sp_profile} returns a object of
+#' \code{respeciate} class, a \code{data.frame} containing a
+#' (re)SPECIATE profile.
+#' @note The option \code{include.refs} adds profile source reference
+#' information to the returned \code{respeciate} data set. The default option
+#' is to not include these because some profiles have several associated
+#' references and including these replicates records, once per reference.
+#' \code{respeciate} code is written to handle this but if you are developing
+#' own methods or code and include references in any profile build you may be
+#' biasing some analyses in favor of those multiple-reference profile unless
+#' you check and account such cases.
+#' @references
+#' Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D.,
+#' Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010.
+#' The development and uses of EPA SPECIATE database.
+#' Atmospheric Pollution Research, 1(4), pp.196-206.
+#' @examples \dontrun{
+#' x <- sp_profile(c(8833, 8850))
+#' plot(x)}
+
+#NOTES
+#######################
+
+#to think about
+#######################
+
+#add functions???
+
+## sp_build_profile to make a profile locally
+##     needs profile_name, profile_code
+##           species_name, species_id
+##           weight_percent (and possibly .value)
+
+## sp_import_profile to import a profile from an external source
+##     extension of above to import data from specific sources
+##           might be very code intensive..?
+
+## local function to pad data using database???
+
+#' @rdname sp
+#' @export
+##     (now importing via xxx.r)
+##     #' @import data.table
+
+# may need to set data.table specifically??
+#      data.table::as.data.table, etc??
+
+#####################
+#to think about
+#####################
+# not sure but I think something in the main build:
+#    (default; include.refs = FALSE)
+#    PROFILES>>SPECIES>>SPECIES_PROPERTIES
+#    (full build; include.refs = TRUE)
+#    PROFILES>>SPECIES>>SPECIES_PROPERTIES>>PROFILE_REFERENCE>>REFERENCES
+# is replicating profiles.
+#
+
+#v 0.2
+#   based on previous sp_profile but using data.table
+#   (0.1 version currently unexported sp_profile.old)
+
+sp_profile <- function(code, include.refs=FALSE) {
+
+  # code currently handles:
+  # respeciate.ref, numerics and characters characters
+
+  #######################
+  #could replace code with ...???
+  ######################
+
+  if(is.data.frame(code) && "PROFILE_CODE" %in% names(code)){
+    code <- unique(code$PROFILE_CODE)
+  }
+  if(is.numeric(code)) code <- as.character(code)
+  if(!is.character(code)) {
+    stop("unexpected 'code' class",
+         call.=FALSE)
+  }
+
+  PROFILES <- as.data.table(sysdata$PROFILES)
+  SPECIES <- as.data.table(sysdata$SPECIES)
+  SPECIES_PROPERTIES <- as.data.table(sysdata$SPECIES_PROPERTIES)
+  PROFILE_REFERENCE <- as.data.table(sysdata$PROFILE_REFERENCE)
+  REFERENCES <- as.data.table(sysdata$REFERENCES)
+
+  ##########################
+  #testing tolower below
+  #   as a fix for code arg case sensitivity
+  ##########################
+  #  could test replacing some of this with sp_pad???
+  #      IF sp_pad stays
+  df <- PROFILES[tolower(PROFILES$PROFILE_CODE) %in% tolower(code),]
+  df <- merge(df, SPECIES, by = "PROFILE_CODE", all.y=FALSE, all.x=TRUE,
+              allow.cartesian=TRUE)
+  df <- merge(df, SPECIES_PROPERTIES, by = "SPECIES_ID", all.y=FALSE,
+              all.x=TRUE, allow.cartesian=TRUE)
+  if(include.refs){
+    df <- merge(df, PROFILE_REFERENCE, by = "PROFILE_CODE", all.y=FALSE,
+                all.x=TRUE, allow.cartesian=TRUE)
+    df <- merge(df, REFERENCES, by = "REF_Code", all.y=FALSE, all.x=TRUE,
+                allow.cartesian=TRUE)
+  }
+  df <- df[order(df$PROFILE_CODE, decreasing = FALSE),]
+
+  #build
+  #note: currently adding .value in rsp_build_respeciate
+  #      could do it here?
+  #          leaving there for now... because we would
+  #          still have to do it there for self-build or
+  #          imported profiles...
+  df <- rsp_build_respeciate(as.data.frame(df))
+  return(df)
+}
+
+
+
+
+
+
+
+
+
+
+
+
+#############################
+#unexported & previous code
+#############################
+
+#sp_profile v 0.1
+#now unexported
+
+rsp_profile.old <- function(code) {
+  #handle numerics/characters
+  #######################
+  #could replace code with ...???
+  ######################
+  if(class(code)[1] == "respeciate" && "PROFILE_CODE" %in% names(code)){
+    code <- unique(code$PROFILE_CODE)
+  }
+  if(is.numeric(code)) code <- as.character(code)
+  if(!is.character(code)) stop("unexpected code class")
+
+  PROFILES <- sysdata$PROFILES
+  SPECIES <- sysdata$SPECIES
+  SPECIES_PROPERTIES <- sysdata$SPECIES_PROPERTIES
+  PROFILE_REFERENCE <- sysdata$PROFILE_REFERENCE
+  REFERENCES <- sysdata$REFERENCES
+
+  #handle multiple codes
+  ############################
+  #replace previous lapply with a direct %in%
+  ##  df <- lapply(code, function(x){
+  ##    df <- PROFILES[PROFILES$PROFILE_CODE == x, ]
+  ##    ...
+  ##  })
+  ##  df <- do.call(rbind, df)
+  #testing as sp_profile.2
+  #faster with data.table
+  ############################
+    df <- PROFILES[PROFILES$PROFILE_CODE %in% code,]
+    df <- merge(df, SPECIES, by = "PROFILE_CODE", all.y=FALSE, all.x=TRUE)
+    df <- merge(df, SPECIES_PROPERTIES, by = "SPECIES_ID", all.y=FALSE, all.x=TRUE)
+    df <- merge(df, PROFILE_REFERENCE, by = "PROFILE_CODE", all.y=FALSE, all.x=TRUE)
+    df <- merge(df, REFERENCES, by = "REF_Code", all.y=FALSE, all.x=TRUE)
+    df <- df[order(df$PROFILE_CODE, decreasing = FALSE),]
+##  })
+  #build
+  df <- rsp_build_respeciate(df)
+  return(df)
+}
+
+
+
diff --git a/R/sp.average.R b/R/sp.average.R
new file mode 100644
index 0000000..b8afd74
--- /dev/null
+++ b/R/sp.average.R
@@ -0,0 +1,142 @@
+#' @name sp.average
+#' @title speciate data averaging functions
+#' @aliases sp_average_profile
+
+
+#' @description Functions to build composite (re)SPECIATE profiles
+
+
+#' @description \code{sp_average_profile} generates an average composite
+#' of a supplied multi-profile \code{respeciate} object.
+#' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+#' profiles.
+#' @param code required character, the unique profile code to assign to the
+#' average profile.
+#' @param name character, the profile name to assign to the average
+#' profile. If not supplied, this defaults to a collapsed list of the codes
+#' of all the profiles averaged.
+#' @param method numeric, the averaging method to apply: Currently only 1 (default)
+#' \code{mean(x)}.
+#' @param ... additional arguments, currently ignored
+#' @return \code{sp_average_profile} returns a single profile average
+#' version of the supplied \code{respeciate} profile.
+#' @note In development function; arguments and outputs likely to be subject to
+#' change.
+#'
+#' This is one of the very few \code{respeciate} functions that modifies the
+#' \code{WEIGHT_PERCENT} column of the \code{respectiate} \code{data.frame}.
+
+
+#NOTE
+
+#' @rdname sp.average
+#' @export
+
+## #' @import data.table (in xxx.r)
+# may need to set data.table specifically??
+#      data.table::as.data.table, etc??
+
+######################
+#average data
+######################
+
+## in development
+
+## to do
+
+##     padding output - see in function notes
+
+##     plot and output options
+
+## to think about
+
+##     currently averaging using mean
+##          do we need to rescale before averaging because not all
+##          WEIGHT_PERCENT add up to 100 ??
+
+##     mean.respeciate
+#           could be a no plot, method = mean version of this function ??
+
+
+##########################
+#issue
+############################
+
+###########################
+#test
+###########################
+
+#aa <- sp_profile(sp_find_profile("ae8", by="profile_type"))
+#sp_average_profile(aa)
+
+
+sp_average_profile <- function(x, code = NULL, name = NULL, method = 1,
+                               ...){
+
+  #################################
+  #check x is a respeciate object??
+
+  #check it has .value
+  x <- rsp_tidy_profile(x)
+
+  #save class to return as is..
+  #    thinking about this
+  tmp <- class(x)
+  xx <- as.data.table(x)
+
+  #save profiles
+  test <- unique(x$PROFILE_CODE)
+
+  #extra.args - not sure if we are using these
+  .xargs <- list(...)
+
+  #get profile terms if supplied
+  if(is.null(code)){
+    stop("need a new profile code")
+  }
+  xx$PROFILE_CODE <- code
+
+  xx$PROFILE_NAME <- if(is.null(name)){
+    if(length(test)>10){
+      "average of multiple cases"
+    } else {
+      paste("average of:", paste(test, collapse = ","), sep="")
+    }
+  } else {
+    name
+  }
+
+  out <- xx[,
+            .(PROFILE_NAME = PROFILE_NAME[1],
+              SPECIES_NAME = SPECIES_NAME[1],
+              SPEC_MW = SPEC_MW[1],
+              .total = sum(.value, na.rm = TRUE),
+              .value = mean(.value, na.rm = TRUE),
+              .n = length(.value[!is.na(.value)]),
+              .sd = sd(.value, na.rm = TRUE)
+            ),
+            by=.(PROFILE_CODE, SPECIES_ID)]
+  #I said we would NOT do this...
+  out$WEIGHT_PERCENT <- out$.value
+
+  #########################
+  #pad missing info
+  #    check method in sp_profile
+  #    species info
+  #    ignore profile and ref info
+  #         because the user is builder...
+  #########################
+
+  #output
+
+  ################
+  #plot, report, etc
+  ###################
+
+  out <- as.data.frame(out)
+  class(out) <- tmp
+  out
+
+}
+
+
diff --git a/R/sp.cluster.R b/R/sp.cluster.R
new file mode 100644
index 0000000..722ebbe
--- /dev/null
+++ b/R/sp.cluster.R
@@ -0,0 +1,351 @@
+#' @name sp.cluster
+#' @title sp_profile clustering
+#' @aliases sp_profile_distance
+
+#' @description sp_profile functions for studying similarities (or
+#' dissimilarities) within multi-profile (re)SPECIATE data sets
+
+#' @description \code{\link{sp_profile_distance}} calculates the statistical distance
+#' between re(SPECIATE) profiles, and clusters profiles according to nearness.
+#' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+#' profiles.
+#' @param output Character vector, required function output: \code{'report'} the
+#' calculated distance matrix; \code{'plot'} a heat map of that distance
+#' matrix.
+#' @note Please note: function in development; structure and arguments may be
+#' subject to change.
+#' @return Depending on the \code{output} option, \code{sp_profile_distance} returns
+#' one or more of the following: the correlation matrix, a heat map of the
+#' correlation matrix.
+
+
+########################
+#to think about
+########################
+
+#think about other functions
+#   variations on current dist
+#   kmeans
+#   others
+
+# https://www.statology.org/k-means-clustering-in-r/
+
+#NOTE
+
+#' @rdname sp.cluster
+#' @export
+
+#  using data.table for dcast
+
+# start build the code for the matching method
+# and the dissim function, etc...
+
+
+#needs a lot of work
+#    needs thinking through
+#    needs options/formals
+
+#output like in sp_species_cor
+
+#also check through and consider other options in sp_profile_cor
+
+#currently tracking
+
+#think about how we handle too-big matrices, e.g.
+#   aa <- sp_profile(sp_find_profile("ae6", by="profile_type"))
+#   sp_profile_distance(aa)
+
+
+
+#test
+######################
+
+#aa <- sp_profile(sp_find_profile("ae8", by="profile_type"))
+#sp_profile_distance(aa)
+
+
+sp_profile_distance <- function(x, output = c("plot", "report")){
+
+  #add .value if missing
+  x <- rsp_tidy_profile(x)
+
+  # make by profile (rows) by species (columns) data.frame
+  # move profile_code to row.names for heatmap
+  .x <- sp_dcast(x, widen = "species")
+  .tmp <- .x[-1:-2]
+  row.names(.tmp) <- .x[,1]
+
+  #dist calculation
+  #reset NAs to 0
+  #   that might not be best option...
+  .dst <- dist(scale(.tmp, center = TRUE, scale = TRUE))
+  .dst[is.na(.dst)] <- 0
+
+  #currently not using these two...
+  #   should drop or re-include
+  #      (think this is a shortcut to similar when used with image)
+  .clst <- hclust(.dst, method = "average")
+  ord <- order(cutree(.clst, k = 3))
+
+  #print(ord)
+  #image(as.matrix(.dst)[ord, ord])
+  .cop <- cophenetic(.clst)
+  #names(.dst) <- .x[,1]
+  #image(as.matrix(.cop)[ord, ord])
+
+  #######################
+  #output
+  #######################
+
+  #think about
+  #    could handle this like plot or
+  #    using a heatmap and legend argument???
+  #
+  if("plot" %in% output){
+    heatmap(as.matrix(.cop), Rowv = FALSE, Colv=FALSE, scale="none")
+  }
+  if("report" %in% output){
+    invisible(.cop)
+  }
+}
+
+#check through below on similarity matrices notes
+
+
+
+
+
+#####################
+#unexported
+#####################
+
+
+##from https://stackoverflow.com/questions/5639794/in-r-how-can-i-plot-a-similarity-matrix-like-a-block-graph-after-clustering-d
+
+## MASS is for mvrnorm
+#require(MASS)
+## seed sets the output
+#set.seed(1)
+#dat <- data.frame(mvrnorm(100, mu = c(2,6,3),
+#                          Sigma = matrix(c(10,   2,   4,
+#                                           2,   3, 0.5,
+#                                           4, 0.5,   2), ncol = 3)))
+
+##Compute the dissimilarity matrix of the standardised data using Eucildean distances
+
+#dij <- dist(scale(dat, center = TRUE, scale = TRUE))
+
+##and then calculate a hierarchical clustering of these data using the group average method
+
+#clust <- hclust(dij, method = "average")
+
+##Next we compute the ordering of the samples on basis of forming 3 ('k') groups from the dendrogram, but we could have chosen something else here.
+
+#ord <- order(cutree(clust, k = 3))
+
+##Next compute the dissimilarities between samples based on dendrogram, the cophenetic distances:
+
+#  coph <- cophenetic(clust)
+
+##Here are 3 image plots of:
+
+##  The original dissimilarity matrix, sorted on basis of cluster analysis groupings,
+##The cophenetic distances, again sorted as above
+##The difference between the original dissimilarities and the cophenetic distances
+##A Shepard-like plot comparing the original and cophenetic distances; the better the clustering at capturing the original distances the closer to the 1:1 line the points will lie
+##Here is the code that produces the above plots
+
+#layout(matrix(1:4, ncol = 2))
+#image(as.matrix(dij)[ord, ord], main = "Original distances")
+#image(as.matrix(coph)[ord, ord], main = "Cophenetic distances")
+#image((as.matrix(coph) - as.matrix(dij))[ord, ord],
+#      main = "Cophenetic - Original")
+#plot(coph ~ dij, ylab = "Cophenetic distances", xlab = "Original distances",
+#     main = "Shepard Plot")
+#abline(0,1, col = "red")
+#box()
+#layout(1)
+
+#and testing
+
+#might be something in here!!
+
+##looks like I was using ae8 and ae6
+## so 6 and 219 profiles
+## full data base is 6845
+## so be useful
+#a <- sp_profile.2(find_sp_profile("pm-ae8", by="profile_type"))
+#a <- sp_profile.2(find_sp_profile("pm-ae6", by="profile_type"))
+
+#dat <- data.frame(MASS::mvrnorm(100, mu = c(2,6,3),
+#                                Sigma = matrix(c(10,   2,   4,
+#                                                 2,   3, 0.5,
+#                                                 4, 0.5,   2), ncol = 3)))
+#dat
+#dij <- dist(scale(dat, center = TRUE, scale = TRUE))
+#image(dij, main = "Original distances")
+#image(as.matrix(dij), main = "Original distances")
+#dij
+#image(as.matrix(dij[order(dij$V1),]), main = "Original distances")
+#dij
+#a <- sp_profile.2(find_sp_profile("pm-ae8", by="profile_type"))
+#a
+#b <- test_wide(a)
+#head(b)
+#head(b[-1:-2])
+#bb <- b[-1:-2])
+#bb <- b[-1:-2]
+#dij <- dist(scale(bb, center = TRUE, scale = TRUE))
+#image(as.matrix(dij), main = "Original distances")
+#heatmap(as.matrix(dij), main = "Original distances")
+#b$PROFILE_NAME
+#heatmap(as.matrix(dij))
+#?heatmap
+#a <- sp_profile.2(find_sp_profile("pm-ae6", by="profile_type"))
+#b <- test_wide(a)
+#bb <- b[-1:-2]
+#heatmap(as.matrix(dij))
+#dij <- dist(scale(bb, center = TRUE, scale = TRUE))
+#heatmap(as.matrix(dij))
+#clust <- hclust(dij, method = "average")
+#?hclust
+#class(dij)
+#dij
+#summary(dij)
+#dij[is.na(dij=0)]
+#dij[is.na(dij)]
+#dij[is.na(dij)]<- 0
+#heatmap(as.matrix(dij))
+#clust <- hclust(dij, method = "average")
+#ord <- order(cutree(clust, k = 3))
+#heatmap(as.matrix(dij)[ord, ord])
+#coph <- cophenetic(clust)
+#image(as.matrix(coph)[ord, ord], main = "Cophenetic distances")
+#image(as.matrix(dij)[ord, ord])
+#ord <- order(cutree(clust, k = 7))
+#coph <- cophenetic(clust)
+#image(as.matrix(dij)[ord, ord])
+#image(as.matrix(coph)[ord, ord], main = "Cophenetic distances")
+#image((as.matrix(coph) - as.matrix(dij))[ord, ord],
+#      main = "Cophenetic - Original")
+#plot(coph ~ dij, ylab = "Cophenetic distances", xlab = "Original distances",
+#     main = "Shepard Plot")
+#abline(0,1, col = "red")
+#dij <- dist(scale(bb, center = TRUE, scale = TRUE))
+#summary(dij)
+#dij[is.na(dij)]
+#as.vector(dij)
+#dij[is.na(dij)] <- (max(dij, na.rm=TRUE)*5)
+#ord <- order(cutree(clust, k = 7))
+#clust <- hclust(dij, method = "average")
+#image(as.matrix(coph)[ord, ord], main = "Cophenetic distances")
+#image(as.matrix(dij)[ord, ord], main = "Cophenetic distances")
+#image(as.matrix(dij), main = "Cophenetic distances")
+#dij <- dist(scale(bb, center = TRUE, scale = TRUE))
+#dij[is.na(dij)] <- (max(dij, na.rm=TRUE)*1)
+#clust <- hclust(dij, method = "average")
+#ord <- order(cutree(clust, k = 7))
+#image(as.matrix(dij)[ord, ord], main = "Cophenetic distances")
+#image(as.matrix(dij), main = "Cophenetic distances")
+#heatmap(dij)
+#heatmap(as.matrix(dij))
+#?heatmap
+#max(dij)
+#which(max==max(dij))
+#which(max=max(dij))
+#where(max=max(dij))
+#?which
+#where(max~max(dij))
+#which(max~max(dij))
+#which(dij==max(dij))
+#summary(dij)
+#?dist
+#dij[order(dij)]
+#dij[order(dij)][1:10]
+#a <- sp_profile.2(find_sp_profile("pm-ae8", by="profile_type"))
+#b <- test_wide(a)
+#bb <- b[-1:-2]
+#dij <- dist(scale(bb, center = TRUE, scale = TRUE))
+#plot(dij)
+#plot(as.matrix(dij))
+#heatmap(as.matrix(dij))
+#dij
+#cor(bb)
+#?cor
+#cor(bb, use="pairwise.complete.obs")
+#heapmap(cor(bb, use="pairwise.complete.obs"))
+#heatmap(cor(bb, use="pairwise.complete.obs"))
+#a <- sp_profile.2(find_sp_profile("pm-ae8", by="profile_type"))
+#b <- test_wide(a)
+#bb <- b[-1:-2]
+#heatmap(cor(bb, use="pairwise.complete.obs"))
+#cor(bb, use="pairwise.complete.obs")
+#plot(cor(bb, use="pairwise.complete.obs"))
+#is(cor(bb, use="pairwise.complete.obs"))
+#o <- cor(bb, use="pairwise.complete.obs")
+#o[is.na(o)] <- -2
+#heatmap(o)
+#o[o== -2] <- -4
+#heatmap(o)
+#o[o== -4] <- -1
+#heatmap(o)
+#?heatmap
+#heatmap(o, Rowv = FALSE)
+#heatmap(o, Rowv = FALSE, Colv=FALSE)
+#o
+#View(o)
+#o[o== -1] <- 0
+#heatmap(o, Rowv = FALSE, Colv=FALSE)
+#heatmap(o, Rowv = FALSE, Colv=FALSE, scale="none")
+#heatmap(o, scale="none")
+#heatmap(dij, scale="none")
+#heatmap(as.matrix(dij), scale="none")
+#a
+#b
+#apply(b, 1, function(x) {length(x[!na(x)])})
+#apply(b, 1, function(x) {length(x[!is.na(x)])})
+#apply(b, 2, function(x) {length(x[!is.na(x)])})
+#b$Nickel
+#?cor
+#longley
+#cor(longley)
+#heatplot(cor(longley), scale="none")
+#heatmap(cor(longley), scale="none")
+#symnum(cor(longley))
+#b
+#apply(b, 2, function(x) {length(x[!is.na(x)])})
+#oo <- b[apply(b, 2, function(x) {length(x[!is.na(x)])})>3]
+#oo
+#cor(oo)
+#o <- cor(oo, use="pairwise.complete.obs")
+#o <- cor(oo[(-1:-2)], use="pairwise.complete.obs")
+#heatmap(o, scale="none")
+#o
+#heatmap(o^2, scale="none")
+#heatmap(o, scale="none")
+#find_sp_species("Ethanol")
+#find_sp_profile("Ethanol", by="species_name")
+#find_sp_profile("Ethanol", by="species_name", partial=FALSE)
+#find_sp_profile("Ethanne", by="species_name", partial=FALSE)
+#find_sp_profile("Ethane", by="species_name", partial=FALSE)
+#pr <- sp_profile(find_sp_profile("Ethane", by="species_name", partial=FALSE))
+#pr
+#summary(factor(pr$PROFILE_NAME))
+#summary(factor(pr$PROFILE_TYPE))
+#summary(factor(pr$SPECIES_NAME))
+#pr$SPECIES_NAME=="Ethane"
+#pr[pr$SPECIES_NAME=="Ethane",]
+#pr[pr$SPECIES_NAME=="Ethane",]
+#plot(pr[pr$SPECIES_NAME=="Ethane",]$WEIGHT_PERCENT)
+#summary(factor(pr$PROFILE_NAME))
+#.pr <- pr[pr$SPECIES_NAME=="Ethane",]
+#summary(factor(.pr$PROFILE_NAME))
+#summary(factor(.pr$PROFILE_CODE))
+#.pr <- .pr[.pr$PROFILE_CODE=="0296",]
+#.pr
+#as.data.frame(.pr)
+#View(.pr)
+#sysdata$REFERENCES
+#test2(.pr)
+#test_wide(.pr)
+#pr
diff --git a/R/sp.cor.R b/R/sp.cor.R
new file mode 100644
index 0000000..8c14e9c
--- /dev/null
+++ b/R/sp.cor.R
@@ -0,0 +1,238 @@
+#' @name sp.cor
+#' @title (re)SPECIATE Species Correlations
+#' @aliases sp_species_cor
+
+#' @description sp_species functions for studying relationships between
+#' species in multi-profile (re)SPECIATE data sets.
+
+#' @description \code{\link{sp_species_cor}} generates a by-species correlation
+#' matrix of the supplied (re)SPECIATE data sets.
+#' @param x \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+#' profiles.
+#' @param min.n \code{numeric} (default 3), the minimum number of species measurements
+#' needed in a profile for the function to use it in correlation calculations.
+#' Here, it should be noted that this does not guarantee the three matched
+#' pairs of measurements needed to calculate a correlation coefficient because
+#' not all profiles contain all species, so there may still be insufficient
+#' overlap on a case-by-case basis.
+#' @param cols a series of \code{numeric}, \code{character} or other class values
+#' that can be translated into a color gradient, used to color valid cases when
+#' generating plots and color keys, default \code{c("#80FFFF", "#FFFFFF", "#FF80FF")}
+#' equivalent to \code{\link{cm.colors}} output.
+#' @param na.col \code{numeric}, \code{character} or other class that can be
+#' translated into a single color, used to color \code{NA}s when generating
+#' plots and color keys, default grey \code{"#CFCFCF"}.
+#' @param heatmap.args \code{logical} or \code{list}, heat map settings. Options
+#' include \code{TRUE} (default) to generate the heat map without modification;
+#' \code{FALSE} to not plot it; or a list of heat map options to alter the plot
+#' default appearance. The plot, a standard heat map with the dendrograms
+#' removed, is generated using \code{\link[stats]{heatmap}}, so see associated
+#' documentation for valid options.
+#' @param key.args \code{logical} or \code{list}, color key settings if plotting
+#' the correlation matrix heat map. Options include \code{TRUE} (default) to
+#' generate the key without modification; \code{FALSE} to not include the key;
+#' or a list of options to alter the key appearance.
+#' @param report \code{logical} or \code{character}, the required function
+#' output. Options include: \code{'silent'} (default), to return the
+#' correlation matrix invisibly; \code{TRUE} to return the matrix
+#' (visibly); and, \code{FALSE} to not return it.
+#' @return By default \code{sp_species_cor} invisibly returns the calculated
+#' correlation matrix a plots it as a heat map, but arguments including
+#' \code{heatmap} and \code{report} can be used to modify function outputs.
+
+
+#NOTE
+
+# provisional list options done for heatmap.args and key.args!!!
+#      but will need tidying
+#      PLUS when done maybe for make local function so same
+#          can be used in other similar functions
+
+#' @rdname sp.cor
+#' @export
+
+#  using data.table for dcast
+
+######################
+#cor
+#generate correlation matrix
+######################
+
+########################
+#in development
+#######################
+
+#to think about
+#########################
+
+#speed up the correlation calculation
+#   currently painful using for loop
+#      can't use stats::cor on whole data set and include min.n option
+#      but there should be a way to make this a lot faster and retain that...
+
+#formals for cor calculation control
+#   include cor:use, etc???
+
+
+#plot handling could be tidier
+#   at moment plot handling is in an if/else
+#       with/without NAs handled differently
+#       but think this could be passed down into key and plot
+#            holding for now until key is in other plots
+#            and have an idea how it'll work
+#                (see also key)
+
+#col key
+#   currently using local function rsp_col_key
+#      could export or more to another package but would added to depends
+
+#plot and col key fine control
+#   suspecting will need better fine control of both
+#   re plot
+#      font size, heatplot options
+#   re col key
+#      font size, scale position, style/type, range
+#         currently holding (see plot handling and col key)
+
+#starting thinking about above
+#   see output control below
+
+#output control
+#    currently uses lots of settings:
+#       heatmap TRUE/FALSE to plot heatmap of correlation matrix
+#           or list of heatmap plot settings
+#       key TRUE/FALSE to add a col key to the heatmap
+#           or list of col key plot settings
+#       report "silent" to return the correlation matrix invisibly,
+#           or TRUE/FALSE to return it visibly or not at all.
+#    The list options are not yet done...
+
+
+#aa <- sp_profile(sp_find_profile("ae8", by="profile_type"))
+#sp_species_cor(aa)
+
+sp_species_cor <- function(x, min.n = 3,
+                           cols = c("#80FFFF", "#FFFFFF", "#FF80FF"),
+                           na.col = "#CFCFCF", heatmap.args = TRUE,
+                           key.args = TRUE, report = "silent"){
+  #if ref missing
+  .x <- sp_dcast(x, widen="species")
+
+  #no point doing any have less than min.n values?
+  .test <- apply(.x, 2, function(x) length(.x[!is.na(x)]))
+  .x <- .x[.test > min.n]
+
+  #any point doing any with only 1 unique value??
+
+  #############################
+  #calculate correlations
+  #############################
+
+  #Could not stop bad cors using...
+  #.cor <- cor(.x[-1:-2], use="pairwise.complete.obs")
+
+  #sd = 0 warnings?
+  #   maybe cases where 1 unique case, e.g. x=c(1,1,1,1,1), y=c(2,2,2,2)
+  #   or where overlap is low <2 x=c(1,1,1,NA,NA), y=c(NA,NA,1,NA,NA)
+
+  #I'm calling this the pokemon cludge...
+  #got to catch 'em all...
+  f <- function(x, y) {
+    test <- !is.na(x) & !is.na(y)
+    x <- x[test]
+    y <- y[test]
+    if(length(x) >= min.n){
+      if(length(unique(x))==1 || length(unique(y))==1){
+        NA
+      } else {
+        cor(x,y, use="pairwise.complete.obs")
+      }
+    } else { NA }
+  }
+
+  .X <- .x[-1:-2]
+  .cor <- as.data.frame(matrix(NA, nrow=ncol(.X), ncol=ncol(.X)))
+  for(i in 1:ncol(.cor)) {
+    for(j in 1:ncol(.cor)) {
+      .cor[i, j] <- f(.X[, i], .X[, j]) # apply f() to each combination
+    }
+  }
+  row.names(.cor) <- colnames(.cor) <- colnames(.X)
+
+  #cheats to test plot output
+  #base r graphics is getting painful
+
+  #.cor[.cor>0.5 & .cor<1] <- 0.5
+  #.cor[is.na(.cor)] <- -0.5
+  #.cor <- .cor/2
+
+  #NB: plot is heatmap without the dendrograms
+  #stackover suggests it is hard going modifying
+  #https://stackoverflow.com/questions/29893630/r-draw-heatmap-with-clusters-but-hide-dendrogram
+
+  if((is.logical(heatmap.args) && heatmap.args) | (is.list(heatmap.args))){
+    .cols.max <- 300
+    if(max(.cor, na.rm=TRUE) < 1){
+      .z <- (1 - max(.cor, na.rm=TRUE))*100
+      .cols.max <- .cols.max - .z
+    }
+    .tmp <- .cor
+    .tmp[is.na(.tmp)] <- -2
+    cols <- c(rep(na.col, 100), colorRampPalette(cols)(200))
+    if(any(is.na(.cor))){
+      #plot when
+      #.cor includes nas
+      .hm.cols <- cols[1:.cols.max]
+      .k.na <- na.col
+
+    } else {
+      #plot when
+      #.cor DOES NOT includes nas
+      .cols.min <- 101
+      if(min(.cor, na.rm=TRUE) < 1){
+        .z <- (1 + min(.cor, na.rm=TRUE))*100
+        .cols.min <- .cols.min + .z
+        #print(.cols.min)
+      }
+      .hm.cols <- cols[.cols.min:.cols.max]
+      .k.na <- NA
+    }
+    .hm <- list(x = as.matrix(.tmp), Rowv = NA, Colv = NA,
+                col = .hm.cols, scale = "none")
+    if(is.list(heatmap.args)){
+      .hm[names(heatmap.args)] <- heatmap.args
+    }
+    do.call(heatmap, .hm)
+    if((is.logical(key.args) && key.args) | (is.list(key.args))){
+      .k <- list(key = c(-1,1), cols=cols[101:300],
+                 #type=1,
+                 ticks= -1:1, y=0.75, na.col = .k.na)
+      if(is.list(key.args)){
+        .k[names(key.args)] <- key.args
+      }
+      do.call(rsp_col_key, .k)
+    }
+  }
+
+  #report handling
+
+  #output
+  ####################
+  #currently aiming for
+  #   report (invisible), plot or both
+  #   report class data.frame
+  #   output plot only does not mean anything...
+
+  if((is.logical(report) && report) | (is.character(report))){
+    if(is.character(report) && report=="silent"){
+      return(invisible(.cor))
+    } else {
+      return(.cor)
+    }
+  } else {
+    return(invisible(NULL))
+  }
+
+}
+
+
diff --git a/R/sp.info.R b/R/sp.info.R
new file mode 100644
index 0000000..62f4b79
--- /dev/null
+++ b/R/sp.info.R
@@ -0,0 +1,174 @@
+#' @name sp.info
+#' @title re(SPECIATE) information
+#' @aliases sp_info sp_profile_info sp_species_info sp_find_profile
+#' sp_find_species
+
+###########################
+#keep think about the names
+###########################
+#  sp_profile_info used to be sp_find_profile
+#  sp_species_info used to be sp_find_species
+#  both sp_find_ functions are currently sp_info_ wrappers
+#  should remove at some point???
+
+#########################
+#to think about
+#########################
+
+#
+
+
+#' @description Functions that provide (re)SPECIATE
+#' source information.
+#' \code{sp_info} generates a brief version report for the currently installed
+#' (re)SPECIATE data sets.
+#' \code{sp_profile_info} searches the currently installed (re)SPECIATE
+#' data sets for profile records.
+#' \code{sp_species_info} searches the currently installed (re)SPECIATE
+#' data sets for species records.
+
+#' @param ... character(s), any search term(s) to use when searching
+#' the local (re)SPECIATE archive for relevant records using
+#' \code{sp_profile_info} or \code{sp_species_info}.
+#' @param by character, the section of the archive to
+#' search, by default \code{'keywords'} for \code{sp_profile_info} and
+#' \code{'species_names'} for \code{sp_species_info}.
+#' @param partial logical, if \code{TRUE} (default)
+#' \code{sp_profile_info} or \code{sp_profile_info} use partial matching.
+
+#' @return \code{sp_info} provides a brief version information report on the
+#' currently installed (re)SPECIATE archive.
+#' @return \code{sp_profile_info} returns a \code{data.frame} of
+#' profile information, as a \code{respeciate} object.
+#' \code{sp_species_info} returns a \code{data.frame} of
+#' species information as a \code{respeciate} object.
+
+#' @examples \dontrun{
+#' profile <- "Ethanol"
+#' pr <- sp_find_profile(profile)
+#' pr
+#'
+#' species <- "Ethanol"
+#' sp <- sp_find_species(species)
+#' sp}
+#'
+
+#might want to replace this with example that
+#   finds profile containing ethanol?
+
+
+#######################
+#sp_info
+#######################
+
+# tidy output???
+#    little messy???
+
+#like to include summary(factor(sysdata$PROFILES$PROFILE_TYPE))
+#  or something like that
+
+# this is not currently catchable!!!!
+# a <- sp_info() #a = NULL
+
+#' @rdname sp.info
+#' @export
+
+sp_info <- function() {
+  #extract profile info from archive
+  .ver <- "source: SPECIATE 5.2\n\t[in (re)SPECIATE since 0.2.0]"
+  .pro <- length(unique(sysdata$PROFILES$PROFILE_CODE))
+  .spc <- length(unique(sysdata$SPECIES_PROPERTIES$SPECIES_ID))
+  cat(.ver, "\n\tProfiles: ", .pro, "\n\tSpecies: ", .spc, sep = "")
+
+}
+
+
+#' @rdname sp.info
+#' @export
+
+sp_profile_info <- function(..., by = "keywords", partial = TRUE) {
+  #extract profile info from archive
+  out <- sysdata$PROFILES
+  terms <- c(...)
+  ###################################
+  #ignoring case because missing loads...
+  #   might want to think about spaces as well???
+  #currently same in sp_find_species
+  #   should think about using common code???
+  #   also error messaging if by is not known???
+  ###################################
+  #how to handle searching a profile
+  #that contains a specific species
+  #    this is all the profiles that have an entry for species_id 529
+  #    sysdata$SPECIES$PROFILE_CODE[sysdata$SPECIES$SPECIES_ID==529]
+  #    could use sp_find_species to get species info, then
+  ###################################
+  if(tolower(by) %in% c("species_name")){
+    ############################
+    #special case
+    #search by species_name
+    #    could add species_id, cas, etc?
+    ###############################
+    species <- sysdata$SPECIES
+    ref <- out$PROFILE_CODE
+    for(ti in terms){
+      ans <- sp_find_species(ti, by=by, partial=partial)
+      terms <- species$PROFILE_CODE[species$SPECIES_ID %in% ans$SPECIES_ID]
+      ref <- ref[ref %in% terms]
+    }
+    out <- out[out$PROFILE_CODE %in% ref,]
+  } else {
+    for(ti in terms){
+      ref <- out[,tolower(names(out))==by]
+      if(nrow(out)>0){
+        out <- if(partial){
+          out <- out[grep(ti, ref, ignore.case = TRUE),]
+        } else {
+          out <- out[tolower(ti)==tolower(ref),]
+        }
+      }
+    }
+  }
+  out <- rsp_build_respeciate(out)
+  return(out)
+}
+
+#' @rdname sp.info
+#' @export
+
+#wrapper for above
+
+sp_find_profile <- function(...){
+  sp_profile_info(...)
+}
+
+#' @rdname sp.info
+#' @export
+
+sp_species_info <- function(..., by = "species_name", partial = TRUE) {
+  #extract species info from archive
+  out <- sysdata$SPECIES_PROPERTIES
+  terms <- c(...)
+  for(ti in terms){
+    ref <- out[,tolower(names(out))==by]
+    if(nrow(out)>0){
+      out <- if(partial){
+        out <- out[grep(ti, ref, ignore.case = TRUE),]
+      } else {
+        out <- out[tolower(ti)==tolower(ref),]
+      }
+    }
+  }
+  #out <- PROFILES[grep(term, PROFILES[[by]], ignore.case = TRUE), ]
+  out <- rsp_build_respeciate(out)
+  return(out)
+}
+
+#' @rdname sp.info
+#' @export
+
+#wrapper for above
+
+sp_find_species <- function(...){
+  sp_species_info(...)
+}
diff --git a/R/sp.match.R b/R/sp.match.R
new file mode 100644
index 0000000..653c415
--- /dev/null
+++ b/R/sp.match.R
@@ -0,0 +1,323 @@
+#' @name sp.match
+#' @title Find nearest matches from reference set of profiles
+#' @aliases sp_match_profile
+
+#' @description \code{sp_match_profile} compares a supplied species
+#' (re)SPECIATE profile and a reference set of supplied profiles and
+#' attempt to identify nearest matches on the basis of correlation
+#' coefficient.
+#' @param x A \code{respeciate} object or similar \code{data.frame} containing
+#' a species profile to be compared with profiles in \code{ref}. If \code{x}
+#' contains more than one profile, these are averaged (using
+#' \code{\link{sp_average_profile}}), and the average compared.
+#' @param ref A \code{respeciate} object, a \code{data.frame} containing a
+#' multiple species profiles, to be used as reference library when identifying
+#' nearest matches for \code{x}.
+#' @param matches Numeric (default 10), the maximum number of profile matches to
+#' report.
+#' @param rescale Numeric (default 5), the data scaling method to apply before
+#' comparing \code{x} and profiles in \code{ref}: options 0 to 5 handled by
+#' \code{\link{sp_rescale}}.
+#' @param min.n \code{numeric} (default 8), the minimum number of paired
+#' species measurements in two profiles required for a match to be assessed.
+#' See also \code{\link{sp_species_cor}}.
+#' @param test.x Logical (default FALSE). The match process self-tests by adding
+#' \code{x} to \code{ref}, which should generate a perfect fit=1 score. Setting
+#' \code{test.x} to \code{TRUE} retains this as an extra record.
+#' @return \code{sp_match_profile} returns a fit report: a \code{data.frame} of
+#' up to \code{n} fit reports for the nearest matches to \code{x} from the
+#' reference profile data set, \code{ref}.
+
+#NOTE
+
+#' @rdname sp.match
+#' @export
+
+######################
+#match
+#find ref profile 'most similar to x
+######################
+
+#  the sp_dcast uses data.table
+#      sp_match uses rbindlist from data.table
+
+#in development
+
+#to do
+#########################
+
+##aa <- sp_profile(sp_find_profile("composite", by="profile_name"))
+##sp_match_profile(sp_profile("41220C"), aa)
+##assuming 41220C exists
+
+##NOTE sp_profile code is case sensitive
+##   not sure if we can fix this??
+
+
+#to think about
+#########################
+
+#match is giving warning
+#   In min(WEIGHT_PERCENT, na.rm = TRUE) :
+#   no non-missing arguments to min; returning Inf
+#       when (I guess) nothing there to compare...
+
+#default for ref
+#    using sp_profile(sp_find_profile("composite", by="profile_name"))
+#        in example.
+
+#could add error if x is more than one profile
+#    could use sp_profile_mean when written if we want to force to one
+#         one profile [?? nb: that function name not decided yet]
+
+#do we want an output option?
+#    have a basic report table but could
+
+#option to exclude test? from report
+
+#how can users make x if not from (re)SPECIATE archive
+#    currently using rsp_ [code after this function]
+#         to anonymise a speciate profile
+#    suggestion:
+#         identify needed columns, formats and names
+#         maybe can build from at least:
+#             species_name and species_id
+#             weight_percent or .value
+
+#renaming appears to trip up when profile_name/id are factors
+#    current fix is to force .tmp.pf.cd and .tmp.pf.nm to character
+#          when extracting them...
+#    maybe think about better?
+
+#at the moment we error (stop()) function if no correlation outputs
+#    so far this appears to be mostly because number of species in x
+#         is less than min.bin for correlation.
+#              1. this is currently hard-coded
+#              2. not really looked at this in detail...
+#                  could return a NULL and warning instead?
+#                  could also do this earlier if min.bin set in formals
+#                     but might need to rethink n, min.bin, etc???
+
+sp_match_profile <- function(x, ref, matches=10, rescale=5,
+                             min.n=8, test.x=FALSE){
+
+  #######################
+  #if ref missing
+  ##################
+  #to do
+  #   using sp_profile(sp_find_profile("composite", by="profile_name"))
+  #   looked promising
+
+  #add .value if not there
+  x <- rsp_tidy_profile(x)
+
+  #tidy x for testing
+  #.x.pr.cd <- as.character(x$PROFILE_CODE)
+  #.x.pr.nm <- as.character(x$PROFILE_NAME)
+
+  #note
+  #   assuming only one profile
+  #   might think about changing this in future
+
+  if(length(unique(x$PROFILE_CODE))>1){
+    x <- sp_average_profile(x, code = "test")
+  } else {
+    x <- sp_average_profile(x, code = "test",
+                            name = paste("test>", x$PROFILE_NAME[1], sep=""))
+  }
+
+  #note
+  #    dropped the force option
+  #    to override min.n if < min.n species in x
+  #if(length(unique(x$SPECIES_ID)) < min.n & force){
+  #  min.n <- length(unique(x$SPECIES_ID))
+  #}
+
+  ###############
+  #do test anyway
+  ###############
+  #if(test.x){
+    matches <- matches + 1
+  #}
+
+  x <- as.data.table(x)
+  ref <- as.data.table(ref)
+  .tmp <- rbindlist(list(x, ref), fill=TRUE)
+
+  #################
+  #think about this
+  #################
+
+  #no normalisation option is method = 0 (via sp_profile_rescale)
+
+  #note: think about calling this rescale because it is the rescale method
+  #      (not match method) we are setting
+
+  #rescale data
+  ################################
+  #note: currently outputting data.frame
+  #      (idea is we are not restricting the user to data.table)
+  #      (but it means we have to reset and I am guessing there is
+  #       a time/memory penalty for that??)
+
+  #might handle sp_rescale_species in/output
+  #   either returns object in class it is given
+  #      or errors if not respeciate
+  #      or errors if not respeciate unless forced
+  #          but then maybe need to check requires
+  #          cols are there???
+
+  .tmp <- as.data.table(sp_rescale_species(.tmp, method=rescale))
+
+  ###################
+  #keep species names and ids for renaming
+  #    as.character to stop rename tripping on factors
+  #         not ideal...
+  ###################
+  .tmp.pr.nm <- as.character(.tmp$PROFILE_NAME)
+  .tmp.pr.cd <- as.character(.tmp$PROFILE_CODE)
+
+
+  ##################
+  #dcast to reshape for search
+  #    cols (profiles), rows(species)
+  ##################
+
+  #testing: using sp_profile_dcast
+  #         code is set up for sp_profile_dcast(.tmp, widen="profile")
+
+  ##previous code
+  #.tmp <- dcast(.tmp,
+  #              SPECIES_ID + SPECIES_NAME ~PROFILE_CODE,
+  #              mean,
+  #              na.rm=TRUE,
+  #              value.var = ".value")
+
+  .tmp <- as.data.table(sp_dcast(.tmp, widen="profile"))
+
+  #nb: need the as.data.table() because sp_profile_dcast
+  #    currently returns data.frame
+  #    we are using data.table but I am trying to avoid forcing
+  #    users into doing the same...
+
+  #to think about:
+  #    an as.is option in functions to made outputs same as inputs?
+  #    disable the 'as is' assumption in previous code
+  #        BUT now not sure why...
+
+  #ignore first two columns
+  #    species_id and species_name
+  .cols <- names(.tmp)[3:(ncol(.tmp))]
+
+  #get x back as a test case...
+  #need a better way to handle test
+  .test <- .tmp[, "test"]
+
+  ###########################
+  #fit term
+  #    currently using conventional regression coefficient
+
+  #nb: we see a warning
+  #     think it is just cases when no variance in the xy data
+  #          so only one pair or (all xs same and all ys sames)???
+
+  #########################
+  #min n
+  #########################
+  #to do
+  #   compare this and code in sp_species_cor
+  #   if/when we deal with this stop message this code may need to be updated
+
+  f <- function(x) {
+    if(length(x[!is.na(x) & !is.na(.test)])>min.n){
+      suppressWarnings(cor(x, .test, use ="pairwise.complete.obs"))
+    } else {
+      NA
+    }
+  }
+  .out <- .tmp[, (.cols) := lapply(.SD, f), .SDcols = .cols]
+
+  ##########################
+  #chop down to build report
+  ##########################
+
+  #wonder if above is non-ideal??
+  #    rows are replicates
+  #    might be a better way of doing this???
+
+  .out <- as.data.frame(.out[1, -1:-2])
+  .out <- sort(unlist(.out), decreasing = TRUE)
+  if(length(.out) > matches){
+    .out <- .out[1:matches]
+  }
+
+  #could be better way to do next bit
+  #just making a lookup for profile info
+
+  #this is a pain because some profile names are replicated
+  #    (several profile codes seem to have same profile name)
+
+  if(length(.out)<1){
+    #see notes....
+    #sometimes this is because there are less than min.n species in the x profile
+    stop("sp_match_profile> No (", min.n, " point) matches for x", call. = FALSE)
+  }
+
+  .tmp <- names(.out)
+  for(i in 1:length(.tmp)){
+    if(.tmp[i] %in% .tmp.pr.cd){
+      .tmp[i] <- .tmp.pr.nm[.tmp.pr.cd == names(.out)[i]][1]
+    }
+  }
+
+  #row.names fulled form .out if you don't overwrite
+
+  .out <- data.frame(PROFILE_CODE=names(.out),
+                     PROFILE_NAME=.tmp,
+                     fit=.out,
+                     row.names = 1:length(.out))
+
+  #conflicted!!!
+  if(!test.x){
+    matches <- matches - 1
+    if("test" %in% x$PROFILE_CODE){
+      .out <- .out[tolower(.out$PROFILE_CODE)!="test",]
+    }
+    if(nrow(.out) > (matches)){
+      .out <- .out[1:matches,]
+    }
+  }
+
+
+
+  #######################
+  #output
+  #######################
+
+  #think about options?
+  rownames(.out) <- NULL
+  return(as.data.frame(.out))
+}
+
+
+
+
+
+#need something to replace this that helps users build local profiles
+
+#basic build needs
+#   profile_name and profile_code
+#   species_name and species_id
+#   weight_percent (and possibly .value)
+
+rsp_ <- function(x){
+  .o <- sp_profile(x)
+  .o$PROFILE_NAME <- paste("test", .o$PROFILE_NAME, sep=">")
+  .o$PROFILE_CODE <- "test"
+  .o
+}
+
+
+
+
+
diff --git a/R/sp.pad.R b/R/sp.pad.R
new file mode 100644
index 0000000..2708923
--- /dev/null
+++ b/R/sp.pad.R
@@ -0,0 +1,228 @@
+#' @name sp.pad
+#' @title (re)SPECIATE profile padding functions
+#' @aliases sp_pad
+
+#' @description Functions for padding \code{respeciate} objects.
+
+#' @description \code{sp_pad} pads a supplied (re)SPECIATE profile data set
+#' with profile and species meta-data.
+#' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+#' profiles.
+#' @param pad character, type of meta data padding, current options
+#' \code{'profile'}, \code{'species'}, \code{'weight'}, \code{reference},
+#' \code{'standard'} (default; all but \code{'reference'}), and \code{'all'}
+#' ('all').
+#' @param drop.nas logical, discard any rows where \code{WEIGHT_PERCENT} is
+#' \code{NA}, default \code{TRUE}.
+#' @return \code{sp_pad} returns \code{x}, with requested additional profile
+#' and species meta-data added as additional \code{data.frame} columns.
+#' See Note.
+#' @note Some data handling can remove (re)SPECIATE meta-data,
+#' and \code{sp_pad}s provide a quick rebuild/repair. For example,
+#' \code{\link{sp_dcast}}ing to a (by-species or by-profile) widened
+#' form strips some meta-data, and padding is used as part of the
+#' \code{\link{sp_melt_wide}} and padding is used to re-add this meta-data
+#' when returning the data set to its standard long form.
+
+#NOTE
+
+#' @rdname sp.pad
+#' @export
+## #' @import data.table
+#   now done in xxx.r
+
+#in development
+
+## think about
+
+# improving the respeciate class handling
+# in -> dcast -> melt -> out
+
+#I do not think this needs the pad argument!!!!
+#    or we add an 'all' default???
+#        I think it just needs to check with common columns
+#            and merge by those
+#   testing this with current version of sp_pad
+#        true default is sp_pad(x, pad="standard")
+#            SO refs would be lost with dcast then melt
+#        could allow pad as logical or character
+
+
+# then a reset function would just remove a column and re-pad???
+
+# might want to specify data.frame::as.data.table???
+
+#think about a sp_repair_weight function
+
+#not sure we need drop.nas here...
+#    think melt is making them...
+
+#does this work for species_id and profile_name?
+#should it....
+
+#need to think about respeciate object class
+#class have
+#      "respeciate", ".rsp"
+#      "respeciate", ".rsp.wp"
+#      "respeciate", ".rsp.ws"
+#      "respeciate", ".rsp.pi"
+#      "respeciate", ".rsp.si"
+# with the second not being defined???
+#    assuming this does cause a build problem
+#    it would just be an identifier for the respeciate object type
+#        standard (long), wide by profile, wide by species,
+#        profile info, species info...
+
+
+#test
+#b <- sp_dcast_species(spq_pm.ae8())
+#c <- b[c("PROFILE_CODE", "SPECIES_NAME", "WEIGHT_PERCENT")]
+#x <- sp_pad(c)
+
+
+sp_pad <- function(x, pad = "standard", drop.nas = TRUE){
+
+
+  #should pad allow TRUE/FALSE???
+  #should argument within sp_pad be method??
+
+  #tidy x
+  x <- rsp_tidy_profile(x)
+  #save class
+  .cls <- class(x)
+  out <- as.data.table(x)
+
+  #profile
+  if(any(c("profile", "profiles", "standard", "all") %in% tolower(pad))){
+    PROFILES <- as.data.table(sysdata$PROFILES)
+    .tmp <- intersect(names(out), names(PROFILES))
+    if(length(.tmp)>0){
+      out <- merge(out, PROFILES, by = .tmp, all.y=FALSE,
+                   all.x=TRUE, allow.cartesian=TRUE)
+    }
+  }
+
+  #species
+  if(any(c("species", "standard", "all") %in% tolower(pad))){
+    SPECIES_PROPERTIES <- as.data.table(sysdata$SPECIES_PROPERTIES)
+    .tmp <- intersect(names(out), names(SPECIES_PROPERTIES))
+    if(length(.tmp) >0){
+      out <- merge(out, SPECIES_PROPERTIES, by = .tmp, all.y=FALSE,
+                   all.x=TRUE, allow.cartesian=TRUE)
+    }
+  }
+
+  #species weights
+  if(any(c("weight", "weights", "standard", "all") %in% tolower(pad))){
+    SPECIES <- as.data.table(sysdata$SPECIES)
+    .tmp <- intersect(names(out), names(SPECIES))
+    if(length(.tmp) >0){
+      out <- merge(out, SPECIES, by = .tmp, all.y=FALSE,
+                   all.x=TRUE, allow.cartesian=TRUE)
+    }
+  }
+
+  #references
+  if(any(c("reference", "references", "all") %in% tolower(pad))){
+    PROFILE_REFERENCE <- as.data.table(sysdata$PROFILE_REFERENCE)
+    REFERENCES <- as.data.table(sysdata$REFERENCES)
+    .tmp <- intersect(names(out), names(PROFILE_REFERENCE))
+    if(length(.tmp) >0){
+      out <- merge(out, PROFILE_REFERENCE, by = .tmp, all.y=FALSE,
+                   all.x=TRUE, allow.cartesian=TRUE)
+    }
+    .tmp <- intersect(names(out), names(REFERENCES))
+    if(length(.tmp) >0){
+      out <- merge(out, REFERENCES, by = .tmp, all.y=FALSE,
+                   all.x=TRUE, allow.cartesian=TRUE)
+    }
+  }
+
+  #drop.nas.
+  if(drop.nas){
+    out <- out[!is.na(out$WEIGHT_PERCENT),]
+  }
+
+  #sp_profile reorders
+  #   not sure if it is a good idea but could add as option
+  #      here would be
+  #      out <- out[order(out$PROFILE_CODE, decreasing = FALSE),]
+
+  #not sure how to handle output...
+  # could return as input class
+  # see notes
+  out <- as.data.frame(out)
+  rsp_build_respeciate(out)
+
+}
+
+
+
+############################
+#not exporting
+############################
+
+#earlier versions
+
+#holding until testing on new code finished
+
+sp_pad.old <- function(x, pad = "species", drop.nas = TRUE){
+
+  #tidy x
+  x <- rsp_tidy_profile(x)
+  #save class
+  .cls <- class(x)
+  out <- as.data.table(x)
+
+  #set up padding for melts...
+  .long <- "nothing"
+  if(pad=="species"){
+    .long <- "SPECIES_NAME"
+  }
+  if(pad=="profile"){
+    .long <- "PROFILE_CODE"
+  }
+
+  PROFILES <- as.data.table(sysdata$PROFILES)
+  SPECIES_PROPERTIES <- as.data.table(sysdata$SPECIES_PROPERTIES)
+  if(.long=="PROFILE_CODE"){
+    #add in profile then species info
+    out <- merge(out, PROFILES, by = .long, all.y=FALSE,
+                 all.x=TRUE, allow.cartesian=TRUE)
+    .tmp <- intersect(names(out), names(SPECIES_PROPERTIES))
+    out <- merge(out, SPECIES_PROPERTIES, by = .tmp, all.y=FALSE,
+                all.x=TRUE, allow.cartesian=TRUE)
+  }
+  if(.long=="SPECIES_NAME"){
+    #add in species then profiles info
+    out <- merge(out, SPECIES_PROPERTIES, by = .long, all.y=FALSE,
+                 all.x=TRUE, allow.cartesian=TRUE)
+    .tmp <- intersect(names(out), names(PROFILES))
+    out <- merge(out, PROFILES, by = .tmp, all.y=FALSE,
+                 all.x=TRUE, allow.cartesian=TRUE)
+  }
+  #to get weight_percentage etc
+  if(pad %in% c("species", "profile", "weight")){
+    SPECIES <- as.data.table(sysdata$SPECIES)
+    .tmp <- intersect(names(out), names(SPECIES))
+    out <- merge(out, SPECIES, by = .tmp, all.y=FALSE,
+                 all.x=TRUE, allow.cartesian=TRUE)
+    } else {
+    #not great but...
+    #if not padding WEIGHT_PERCENT has to be .value
+    out$WEIGHT_PERCENT <- out$.value
+  }
+  #drop.nas.
+  if(drop.nas){
+    out <- out[!is.na(out$WEIGHT_PERCENT),]
+  }
+
+  #not sure how to handle output...
+  #see notes
+  out <- as.data.frame(out)
+  rsp_build_respeciate(out)
+}
+
+
+
+
diff --git a/R/sp.pls.R b/R/sp.pls.R
new file mode 100644
index 0000000..e72ffbb
--- /dev/null
+++ b/R/sp.pls.R
@@ -0,0 +1,1045 @@
+#' @name sp.pls
+#' @title (re)SPECIATE profile Positive Least Squares
+#' @aliases sp_pls_profile pls_report pls_refit_species pls_fit_parent
+#' pls_plot pls_plot_species pls_plot_profile
+
+#' @description Functions for Positive Least Squares (PSL) fitting of
+#' (re)SPECIATE profiles
+
+#' @description
+#' \code{sp_pls_profile} builds PSL models for supplied profile(s) using
+#' the \code{\link{nls}} function, the 'port' algorithm and a lower
+#' limit of zero for all model outputs to enforce the positive fits. The
+#' modeled profiles are typically from an external source, e.g. a
+#' measurement campaign, and are fit as a linear additive series of reference
+#' profiles, here typically from (re)SPECIATE, to provide a measure of
+#' source apportionment based on the assumption that the profiles in the
+#' reference set are representative of the mix that make up the modeled
+#' sample. The \code{pls_} functions work with \code{sp_pls_profile}
+#' outputs, and are intended to be used when refining and analyzing
+#' these PLS models. See
+
+#' @param x A \code{respeciate} object, a \code{data.frame} of
+#' profiles in standard long form, intended for PLS modelling.
+#' @param ref A \code{respeciate} object, a \code{data.frame} of
+#' profiles also in standard long form, used as the set of candidate
+#' source profiles when fitting \code{x}.
+#' @param power A numeric, an additional factor to be added to
+#' weightings when fitting the PLS model. This is applied in the form
+#' \code{weight^power}, and increasing this, increases the relative
+#' weighting of the more heavily weighted measurements. Values in the
+#' range \code{1 - 2.5} are sometimes helpful.
+#' @param ... additional arguments, currently ignored.
+#' @param pls A \code{sp_pls_profile} output, only used by \code{pls_}
+#' functions.
+#' @param name Character class, for \code{pls_refit_species} only, the
+#' name of the species to refit. All other species in the reference
+#' profiles are held 'as is' and the \code{name}d species is refit based on
+#' previous PLS estimated source contribution time-series, then these are
+#' in turn refit using the new profiles generated by re-distributing the
+#' \code{name}d species.
+#' @param parent (for \code{pls_fit_parent} only) a data.frame of measurements
+#' of an additional species. This is treated as the sample parent (or a proxy
+#' for this), e.g. the total particulate matter (PM) measurement for
+#' (percentage) PM profiles. It is added to \code{x}, dummy entries are added
+#' to \code{ref} with a start value of 100 (percent), and the model is refitted.
+#' @param start,lower,upper all numeric, the start value and lower and upper
+#' boundaries when fitting a parameter.
+#' @param species,profile (for \code{pls_plot}s only) numeric or character
+#' identifying the species or profile to plot. If numeric, these are treated
+#' as indices of the species or profile, respectively, in the PLS model; if
+#' character, species is treated as the name of species and profile is treated
+#' as the profile code. Both can be concatenation to product multiple plots and
+#' the special case \code{-1} is a short cut to all species or profiles,
+#' respectively.
+#' @param type (for \code{pls_plot}s only) numeric, the plot type if
+#' multiple options are available.
+#' @param log (for \code{pls_plot_profile} only) logical, if \code{TRUE} this
+#' applies 'log' scaling to the primary Y axes of the plot.
+
+#########################
+# need to check terminology for this...
+#      The zero handling is a based on offset in plot(..., log="y", off.set)
+#      but automatically estimated...
+
+#' @return \code{sp_pls_profile} returns a list of nls models, one per
+#' profile/measurement set in \code{x}. The \code{pls_} functions work with
+#' these outputs. \code{pls_report} generates a \code{data.frame} of
+#' model outputs, and is used of several of the other \code{pls_}
+#' functions. \code{pls_refit_species} and \code{pls_fit_parent}
+#' return the supplied \code{sp_pls_profile} output, updated on the
+#' basis of the \code{pls_} function action. \code{pls_plot}s produce
+#' various plots commonly used in source apportionment studies.
+
+#' @note This implementation of PLS applies the following modeling constraints:
+#'
+#' 1. The generate a model of \code{x} that is positively constained linear
+#' product of the profiles in \code{ref}, so outputs can only be
+#' zero or more.  Although the model is generated using \code{\link{nls}},
+#' which is a Nonlinear Least Squares (NLS) model, the fitting term applied
+#' in this case is linear.
+#'
+#' 2. The number of species in \code{x} must be more that the number of
+#' profiles in \code{ref}, to reduce the likelihood of over-fitting.
+#'
+#'
+
+# GENERAL NOTES
+
+# TO DO
+# link to CMB as crude form of CMB and reference?
+
+# these all need code tidying
+
+# check individual function notes
+
+
+############################
+############################
+## sp_pls_profile
+############################
+############################
+
+#' @rdname sp.pls
+#' @export
+
+##   now imports from xxx.r
+##   #' @import data.table
+
+#This is version 2
+
+#version 1 combined version2 and pls_report
+#now separated because it simplified pls model reworking
+
+#currently keeping the function args
+#   might not need to do this BUT
+#   model does not seem to be tracking them when ...
+
+# check power handling is right
+
+sp_pls_profile <- function(x, ref,
+                            power = 1,
+                            ...){
+
+  ##################
+  #from rough code
+  ##################
+
+  ########################
+  #only allowing profiles < species
+  if(length(unique(ref$PROFILE_CODE)) >= length(unique(x$SPECIES_ID))){
+    stop("sp_pls: need #.species > #.profiles, more species or less profiles??",
+         call. = FALSE)
+  }
+
+  x.args <- list(...)
+
+  ####################
+  #make sure we only have one species / profile
+  ####################
+  #tidying
+  .pr.cd <- unique(x$PROFILE_CODE)
+  ##  .xx <- respeciate:::rsp_tidy_profile(x)
+  .xx <- lapply(.pr.cd, function(y){
+    .x <- x[x$PROFILE_CODE==y,]
+    .x <- sp_average_profile(.x, y, .x$PROFILE_NAME[1])
+    .x
+  })
+  .xx <- data.table::rbindlist(.xx)
+  #should be same! redundant
+  .pr.cd <- unique(.xx$PROFILE_CODE)
+
+  ####################
+  #reduce ref to just species in x
+  ###################
+  #no point to look at any species not in x
+  ref <- subset(ref, SPECIES_ID %in% unique(.xx$SPECIES_ID))
+
+  ###################
+  #nudge
+  ###################
+  #dropping nudge from version 2
+  ##
+  #nb: method was nudge before analysis
+  #and a nudge back after
+  #   nudge(identified.species)->pls->report->nudge back(identified.species)
+
+  #if(!is.null(nudge)){
+  #  for(i in nudge){
+  #    #ref might have both WEIGHT_PERCENT and .value
+  #    ref[ref$SPECIES_NAME==i, "WEIGHT_PERCENT"] <-
+  #      ref[ref$SPECIES_NAME==i, "WEIGHT_PERCENT"] * 10
+  #    .xx[.xx$SPECIES_NAME==i, "WEIGHT_PERCENT"] <-
+  #      .xx[.xx$SPECIES_NAME==i, "WEIGHT_PERCENT"] * 10
+  #    .xx[.xx$SPECIES_NAME==i, ".value"] <-
+  #      .xx[.xx$SPECIES_NAME==i, ".value"] * 10
+  #  }
+  #}
+
+  ##############################
+  #main step/ once per profile
+  ##############################
+  #can we replace this with data.table
+  ans <- lapply(.pr.cd, function(y){
+    .test <- try({
+      #need to try this because it does not always work
+      .x <- as.data.frame(.xx[.xx$PROFILE_CODE==y,])
+      .x <- sp_average_profile(.x, "test", "1_test")
+
+      #might not need one of this-and-same-above
+      #might be better doing it here...
+      .tmp <- subset(ref, ref$SPECIES_ID %in% unique(.x$SPECIES_ID))
+
+      #could change this with rbindlist version??
+      .ref <- intersect(names(.x), names(.tmp))
+      .out <- rbind(.x[.ref], .tmp[.ref])
+      .out <- sp_dcast_profile(.out)
+
+      #build formula and model args
+      .tmp <- names(.out)
+      .tmp <- .tmp[!.tmp %in% c("SPECIES_ID", "SPECIES_NAME", "test")]
+      #zero cases for port function
+      .ls <- paste("m_", .tmp, sep="")
+      .ls2 <- lapply(.ls, function(x){0})
+      names(.ls2) <- .ls
+      .for <- paste("(m_", .tmp, "*`", .tmp, "`)", sep="", collapse = "+")
+      .for <- as.formula(paste("test~", .for))
+      .wt <- 1/.out$test
+      ############################
+      #note
+      ############################
+      #nls wants lower and upper as vectors
+      #but seems to handle lists
+      #   should check how this is done?
+      #   might not translate sesnibly...
+      #   pass upper, default INF???
+      args <- list(formula = .for,
+                   data=.out,
+                   start=.ls2,
+                   lower=.ls2,
+                   weights=.wt,
+                   algorithm="port",
+                   control=nls.control(tol=1e-5))
+      args <- modifyList(args, x.args[names(x.args) %in% names(args)])
+      args$weights <- args$weights^power
+      x.args <- list(power=power)
+
+      #run nls/pls
+      #####################
+      mod <- do.call(nls, args)
+#      mod <- nls(.for, data=.out,
+#                 weights = (1/.out$test)^power, # think about weighting
+#                 start=.ls2, lower=.ls2,
+#                 algorithm="port",
+#                 control=nls.control(tol=1e-5) #think about tolerance
+#      )
+
+      #if we need to calculate AIC on a case-by-case basis...
+      #for model, I think we need to use stats:::logLik.nls for AIC calc...
+      #see
+      #https://stackoverflow.com/questions/39999456/aic-on-nls-on-r
+      #(currently calculating AIc on the lm model on the overall fit on
+      # all species in all profiles as part of pls_report)
+
+      ###################################
+      #currently all-data stats in pls_report
+      #  and returning list of models
+      ###################################
+      ##.tmp <- summary(mod)$coefficients
+      ##.p.mod <- .tmp[,4]
+      ##names(.p.mod) <- gsub("m_", "p_", names(.p.mod))
+      ##.out <- data.frame(PROFILE_CODE = y,
+      ##           t(.tmp[,1]),
+      ##           t(.p.mod))
+      ##.out
+
+      #output list of mod + data
+      ################################
+      #could add args?
+      #  then drop power from pls_ function formals
+      #       or allow as an overwrite only...
+      list(mod=mod,          #model outputs
+           args=args,        #model args
+           x.args=x.args)    #rsp args
+    }, silent = TRUE)
+    if(class(.test)[1]=="try-error"){
+      NULL
+    } else {
+      .test
+    }
+  })
+  names(ans) <- .pr.cd
+
+  #returns the list of nls models
+  #(assuming all viable, one per profile_code in x)
+
+  #to think about object type???
+  return(ans)
+
+}
+
+
+#############################
+#############################
+## pls_report
+#############################
+#############################
+
+#' @rdname sp.pls
+#' @export
+
+##   now imports from xxx.r
+##   #' @import data.table
+
+# this is the model report table
+# other pls_ functions use output
+#    so take care if changes
+
+pls_report <- function(pls){
+
+  ans <- lapply(names(pls), function(x){
+    .xx <- pls[[x]]
+    .out <- .xx$args$data
+    .tmp <- summary(.xx$mod)$coefficients
+    .p.mod <- .tmp[,4]
+    names(.p.mod) <- gsub("m_", "p_", names(.p.mod))
+    .out <- data.frame(PROFILE_CODE = x,
+                       .out,
+                       t(.tmp[,1]),
+                       t(.p.mod),
+                       pred = predict(.xx$mod, newdata=.xx$args$data),
+                       check.names=FALSE)
+    .out
+  })
+  ans <- data.table::rbindlist(ans, use.names=TRUE, fill=TRUE)
+  #####################
+  #think about
+  #####################
+  #    adding x_[profile] (m_[profile] * profile) calculations here
+  #    currently done on fly in some plots...
+  ans$.value <- ans$test
+  mod2 <- lm(pred~.value, data=ans)
+  ans$adj.r.sq <- summary(mod2)$adj.r.squared
+  ans$slope <- summary(mod2)$coefficients[2,1]
+  ans$p.slope <- summary(mod2)$coefficients[2,4]
+  ans$intercept <- summary(mod2)$coefficients[1,1]
+  ans$p.intercept <- summary(mod2)$coefficients[1,4]
+  ###########
+  #(also noted in sp_pls_profile)
+  #if we need to calculate it on a case-by-case basis...
+  #need to read this:
+  #https://stackoverflow.com/questions/39999456/aic-on-nls-on-r
+  #see stats:::logLik.nls for AIC calc...
+  ans$AIC <- AIC(mod2)
+
+  as.data.frame(ans)
+}
+
+
+
+###########################
+###########################
+## pls_refit_species
+###########################
+###########################
+
+#' @rdname sp.pls
+#' @export
+
+##   now imports from xxx.r
+##   #' @import data.table
+
+# like to drop power from formals
+#     maybe ignore or pass overwrites via ...?
+
+# need to update the model handling so it is like sp_pls_profile
+#     this would sort power issue above
+#          also means the user can change setting themselves
+#          THINK ABOUT THIS
+#               they could make a pls that was not positively constrained
+
+
+
+pls_refit_species <- function(pls, name, power=1,
+                              ...){
+  .xx <- pls_report(pls)
+  #name might want to be case-non-sensitive at some point
+  #think about how to do this one...
+  .data <- .xx[.xx$SPECIES_NAME==name,]
+  #get and hold all the m_ values
+  #update profile contributions for named species
+  .ms <- names(.data)[grepl("^m_", names(.xx))]
+  .xs <- gsub("^m_", "", .ms)
+  .for <- paste("(`", .ms, "`*`", .xs, "`)",
+                sep="", collapse = "+")
+  .for <- as.formula(paste("test~", .for))
+  .da <- .data[!names(.data) %in% .xs]
+
+
+  .ls <- lapply(.xs, function(x){0})
+  names(.ls) <- .xs
+
+  #################
+  #user might want to set this???
+
+  .ls2 <- lapply(.xs, function(x){.data[1, x]})
+  names(.ls2) <- .xs
+
+  mod <- nls(.for, data=.da,
+             #weights = 1/(.out$test^push), # think about weighting
+             start=.ls2, lower=.ls,
+             algorithm="port",
+             control=nls.control(tol=1e-5) #think about tolerance
+  )
+
+  .data[.xs] <- data.frame(t(coefficients(mod)))
+
+  #lazy
+  .ans <- .data
+
+  for(i in .ans$PROFILE_CODE){
+    .ii <- subset(.ans, PROFILE_CODE==i)
+    .ii <- .ii[names(.ii) %in% names(pls[[i]]$args$data)]
+    .sp.ord <- unique(pls[[i]]$args$data$SPECIES_ID)
+    pls[[i]]$args$data <- subset(pls[[i]]$args$data, SPECIES_NAME!=name)
+    pls[[i]]$args$data <- rbind(pls[[i]]$args$data, .ii)
+    #put back in right order
+    pls[[i]]$args$data <-
+      pls[[i]]$args$data[order(ordered(pls[[i]]$args$data$SPECIES_ID,
+                         levels=.sp.ord)),]
+    #rebuild model
+    .for <- as.character(formula(pls[[i]]$mod))
+    .for <- as.formula(paste(.for[2], .for[1], .for[3], sep=""))
+    .ms <- names(pls[[i]]$args$data)
+    .ms <- .ms[!.ms %in% c("SPECIES_ID", "SPECIES_NAME", "test")]
+    .ls <- lapply(.ms, function(x){0})
+    names(.ls) <- paste("m_", .ms, sep="")
+    .da <- pls[[i]]$args$data
+
+    pls[[i]]$mod <- nls(.for, data=.da,
+                        weights = (1/.da$test)^power, # think about weighting
+                        start=.ls, lower=.ls,
+                        algorithm="port",
+                        control=nls.control(tol=1e-5,
+                                            warnOnly = TRUE) #think about tolerance
+    )
+  }
+
+  pls
+
+}
+
+
+
+
+####################################
+####################################
+## pls_fit_parent
+####################################
+####################################
+
+
+#' @rdname sp.pls
+#' @export
+
+##   now imports from xxx.r
+##   #' @import data.table
+
+#############################
+#this needs a lot of work
+#############################
+
+
+# (like pls_refit_species)
+# like to drop power from formals
+#   maybe ignore or pass overwrites via ...?
+
+# need to update the model handling so it is like sp_pls_profile
+#     this would sort power issue above
+#          also means the user can change setting themselves
+#          THINK ABOUT THIS
+#               they could make a pls that was not positively constrained
+#      this would also remove the start, lower and upper options
+#           from the formals...
+
+# parent could already be in x
+#    then parent could just be the name of parent???
+
+# also a case for using this to add a non-parent to x
+#    e.g. pls_fit_unknown_species...
+#    to fit a species to the existing model as a source apportion of
+#        that unknown...
+#    in which case maybe this should just be a wrapper for that
+#        with the start, lower and upper like below
+
+# if we are setting start and lower
+#     start = lower if start is missing might be safer...
+
+
+pls_fit_parent <- function(pls, parent, power=1,
+                           start=100,
+                           lower=50, upper=200, ...){
+
+  .out <- pls_report(pls)
+  #parent should only have one species
+  #and have same profiles as pls model data
+  #and its contribution to all sources is set by .value
+  .out <- subset(.out, SPECIES_ID == unique(.out$SPECIES_ID)[1])
+  .test <- c("PROFILE_CODE", ".value", "WEIGHT_PERCENT")
+  .test <- names(parent)[names(parent) %in% .test]
+  .data <- parent[.test]
+  names(.data)[2] <- "parent"
+  .data <- merge(.out, .data[c(1:2)])
+
+  #formula
+  .ms <- names(.data)[grepl("^m_", names(.out))]
+  .for <- paste("(`", .ms, "`*`", gsub("^m_", "n_", .ms), "`)",
+                sep="", collapse = "+")
+  .for <- as.formula(paste("parent~", .for))
+
+  .ns <- .ms
+  names(.ns) <- gsub("^m_", "n_", .ms)
+  .ls <- lapply(.ns, function(x){start})
+  .ls2 <- lapply(.ns, function(x){lower})
+  .ls3 <- lapply(.ns, function(x){upper})
+
+  mod <- nls(.for, data=.data,
+             #weights = (1/.out$test)^power, # think about weighting
+             start=.ls,
+             lower=.ls2,
+             upper=.ls3,
+             algorithm="port",
+             control=nls.control(tol=1e-5) #think about tolerance
+  )
+  .ans <- data.frame(
+    PROFILE_CODE = .data$PROFILE_CODE,
+    SPECIES_ID = parent$SPECIES_ID[1],
+    SPECIES_NAME = parent$SPECIES_NAME[1],
+    t(coefficients(mod)),
+    test = .data$parent
+  )
+  names(.ans) <- gsub("^n_", "", names(.ans))
+  for(i in .ans$PROFILE_CODE){
+    .ii <- subset(.ans, PROFILE_CODE==i)
+    .ii <- .ii[names(.ii) != "PROFILE_CODE"]
+    pls[[i]]$args$data <-
+      rbind(pls[[i]]$args$data, .ii)
+    #rebuild model
+    .for <- as.character(formula(pls[[i]]$mod))
+    .for <- as.formula(paste(.for[2], .for[1], .for[3], sep=""))
+    .ms <- names(pls[[i]]$args$data)
+    .ms <- .ms[!.ms %in% c("SPECIES_ID", "SPECIES_NAME", "test")]
+    .ls <- lapply(.ms, function(x){0})
+    names(.ls) <- paste("m_", .ms, sep="")
+    .da <- pls[[i]]$args$data
+
+    pls[[i]]$mod <- nls(.for, data=.da,
+                        weights = (1/.da$test)^power, # think about weighting
+                        start=.ls, lower=.ls,
+                        algorithm="port",
+                        control=nls.control(tol=1e-5) #think about tolerance
+    )
+  }
+
+  pls
+
+}
+
+
+####################################
+###################################
+## pls_plots
+###################################
+###################################
+
+#these are all draft
+
+
+####################################
+####################################
+## pls_plot
+####################################
+####################################
+
+
+#' @rdname sp.pls
+#' @export
+
+##   now imports from xxx.r
+##   #' @import data.table
+
+#############################
+#this needs a lot of work
+#############################
+
+# this uses unexported rsp_profile_pie function below...
+#     both pls_plot and rsp_profile_pie need work...
+
+
+pls_plot <- function(pls, species=1, type=1,...){
+
+  #main summary plot
+  #require(data.table)
+
+  #to do
+  ############################
+
+  #general
+
+  #tidy code;
+  #    this was put together fast...
+  #    lots of code can be tidied/simplified...
+
+  #needs external plot control...
+  #    arg passing to barplot in ...???
+  #    at least col control???
+
+  #type 1
+  ############################
+  #like x axes to look like a conventionally numeric
+  #like option for x axes to be mapped onto another
+  #       measurement
+  #       currently an index based on PROFILE_CODE
+  #            maybe as.x or map.x, etc...
+
+
+  #type =2 for pie plot...
+  #############################
+  #using modification of pie
+  #      unexported/below
+  #      see notes there...
+
+
+
+  dat <- pls_report(pls)
+  .sp.ref <- unique(dat$SPECIES_NAME)
+  if(missing(species)){
+    species <- .sp.ref[1]
+  }
+  if(is.numeric(species)){
+    if(all(species == -1)){
+      species <- .sp.ref
+    } else {
+      species <- .sp.ref[species]
+    }
+  }
+
+  .sp.ord <- unique(dat$SPECIES_ID) #only need this if I
+  .sp.m.pro <- names(dat)[grep("^m_", names(dat))]
+  .sp.pro <- gsub("^m_", "", .sp.m.pro)
+
+  #could put this is pls_report???
+  for(i in .sp.pro){
+    dat[, paste("x_", i, sep="")] <- dat[, paste("m_", i, sep="")] * dat[,i]
+  }
+
+  .sp.x.pro <- names(dat)[grep("^x_", names(dat))]
+
+  .rep <- dat[c("SPECIES_NAME", "SPECIES_ID", "PROFILE_CODE", .sp.x.pro)]
+  .rep <- melt(as.data.table(.rep),
+               id=c("SPECIES_ID", "SPECIES_NAME", "PROFILE_CODE"))
+
+  .tot <- as.data.table(dat)
+  .cs <- c(".value", "pred", .sp.x.pro)
+  .tot <- .tot[, lapply(.SD, function(x) sum(x, na.rm=TRUE)),
+               .SDcols= .cs, by=c("SPECIES_ID", "SPECIES_NAME")]
+
+  for(i in species){
+
+    .rep2 <- as.data.frame(subset(.rep, SPECIES_NAME==i))
+    .rep2$.index <- as.character(.rep2$PROFILE_CODE)
+    .tot2 <- as.data.frame(subset(.tot, SPECIES_NAME==i))
+
+    if(1 %in% type){
+      .rep2$.index <- as.numeric(ordered(.rep2$.index, levels=unique(.rep2$.index)))
+      .rep2$.index <- ordered(.rep2$.index, levels=unique(.rep2$.index))
+      #.cols <- palette.colors(n = length(.sp.x.pro), palette = "Okabe-Ito",
+      #                        recycle = FALSE)
+      .cols <- heat.colors(n=length(.sp.x.pro))
+      .ncol <- ceiling(length(.sp.x.pro)/3)
+      .leg.text <- gsub("^x_", "", .sp.x.pro)
+      .bar <- barplot(value~variable + .index,  .rep2, col=.cols,
+                      main = i, ylab="Measurement",
+                      xlab="Sample [index]",
+                      legend.text=.leg.text,
+                      args.legend=list(cex=0.8,
+                                       ncol=.ncol))
+      .dat <- subset(dat, SPECIES_NAME==i)
+      lines(.bar, .dat$.value)
+    }
+    if(2 %in% type){
+      .cols <- heat.colors(n=length(.sp.x.pro))
+      .vals <- unlist((.tot2[,.sp.x.pro]/.tot2$.value) * 100)
+
+      .labs=paste(gsub("x_", "", .sp.x.pro), "\n", signif(.vals, 3) , "%", sep="")
+      print(sum(.vals))
+      rsp_profile_pie(.vals, col=.cols, edges=1000,
+                      labels=.labs, main=i)
+    }
+
+  }
+  #output
+  #not sure if dat, .rep or output is most useful??
+  invisible(.rep)
+}
+
+
+####################################
+####################################
+## pls_plot_species
+####################################
+####################################
+
+
+#' @rdname sp.pls
+#' @export
+
+##   now imports from xxx.r
+##   #' @import data.table
+
+#############################
+#this needs a lot of work
+#############################
+
+
+pls_plot_species <- function(pls, species, type=1, ...){
+
+  #general
+
+  #tidy code;
+  #    this was put together fast...
+  #    lots of code can be tidied/simplified...
+
+  #needs external plot control...
+  #    arg passing to barplot in ...???
+  #    at least col control???
+
+  #type 1
+  ############################
+
+  #various pls model plots for species
+  dat <- pls_report(pls)
+  .sp.ref <- unique(dat$SPECIES_NAME)
+  if(missing(species)){
+    species <- .sp.ref[1]
+  }
+  if(is.numeric(species)){
+    if(all(species == -1)){
+      species <- .sp.ref
+    } else {
+      species <- .sp.ref[species]
+    }
+  }
+  for(i in species){
+    d2 <- subset(dat, SPECIES_NAME==i)
+    lims <- range(c(d2$.value, d2$pred))
+    mod <- lm(pred~0+.value, d2)
+    .sum <- paste("y = ", signif(summary(mod)$coefficients[1,1], 3),
+                  "x (adj.R2 = ", signif(summary(mod)$adj.r.squared, 3),
+                  ")", sep="")
+    if(1 %in% type){
+      plot(d2$.value, d2$pred, type="n",
+           main=i,
+           xlab="Measurement",
+           ylab="Model",
+           xlim=lims, ylim=lims)
+      grid()
+      abline(a=0, b=1, col="grey")
+      points(d2$.value, d2$pred)
+      abline(mod, col="red", lty=2)
+      text(lims[1], lims[2], .sum, adj=c(0,1), cex=0.75)
+    }
+    if(2 %in% type){
+      plot(d2$.value, type="n",
+           main=i,
+           ylab="Measurement",
+           xlab="Sample [index]",
+           ylim=lims)
+      lines(d2$.value)
+      lines(d2$pred, col="red")
+    }
+  }
+  invisible(NULL)
+}
+
+
+
+####################################
+####################################
+## pls_plot_profile
+####################################
+####################################
+
+
+#' @rdname sp.pls
+#' @export
+
+##   now imports from xxx.r
+##   #' @import data.table
+
+#############################
+#this needs a lot of work
+#############################
+
+
+pls_plot_profile <- function(pls, profile, log=FALSE, ...){
+
+  #general
+
+  #tidy code;
+  #    this was put together fast...
+  #    lots of code can be tidied/simplified...
+
+  #needs external plot control...
+  #    arg passing to barplot in ...???
+  #    at least col control???
+
+  #type
+  ############################
+  #(currently only one so not needed...)
+  #
+
+  #log logical ONLY
+  ############################
+  #   very fiddly...
+  #
+
+  #require(data.table)
+
+  dat <- pls_report(pls)
+  #need to reset species order after calculating stats
+  #  think about making first half of this function a dedicated function??
+
+  #need to think this through
+  #  there is a lot here that is very painful...
+
+  #could pass plot commands via the ... args
+
+  .sp.ord <- unique(dat$SPECIES_ID)
+  .sp.m.pro <- names(dat)[grep("^m_", names(dat))]
+  .sp.pro <- gsub("^m_", "", .sp.m.pro)
+  if(missing(profile)){
+    profile <- .sp.pro[1]
+  }
+  if(is.numeric(profile)){
+    if(all(profile == -1)){
+      profile <- .sp.pro
+    } else {
+      profile <- .sp.pro[profile]
+    }
+  }
+  m_profile <- paste("m_", profile, sep="")
+
+  #just profiles asked for
+  dat <- dat[c("SPECIES_ID", "SPECIES_NAME", "PROFILE_CODE",
+               profile, m_profile, "pred", ".value")]
+  #calculate percentage of species
+  for(i in profile){
+    dat[, paste("x_", i, sep="")] <- dat[, paste("m_", i, sep="")] * dat[,i]
+  }
+  .rep <- data.table::as.data.table(dat)
+  .cols <- c(".value", "pred", paste("x_", profile, sep=""))
+  .rep <- .rep[, lapply(.SD, function(x) sum(x, na.rm=TRUE)),
+               .SDcols= .cols, by=c("SPECIES_ID", "SPECIES_NAME")]
+  .rep <- as.data.frame(.rep)
+  for(i in profile){
+    .rep[, paste("pc_", i, sep="")] <- (.rep[, paste("x_", i, sep="")] / .rep$pred) * 100
+  }
+
+  dat <- dat[dat$PROFILE_CODE==unique(dat$PROFILE_CODE)[1],]
+  dat <- merge(.rep, dat[c("SPECIES_ID", "SPECIES_NAME", "PROFILE_CODE", profile)],)
+  dat <- dat[order(ordered(dat$SPECIES_ID, levels=.sp.ord)), ]
+  rownames(dat) <- 1:nrow(dat)
+
+
+  for(i in profile){
+    d2 <- dat[c("SPECIES_ID", "SPECIES_NAME", "PROFILE_CODE",
+                i, paste("pc_", i, sep=""))]
+
+    if(log){
+      #plotting first axis as logs
+      .f.cex = 0.8
+      .x <- d2$SPECIES_NAME
+      .m.x <- max(nchar(.x), na.rm=T)/1.3*.f.cex
+      .y <- d2[,i]
+      .min <- min(.y[.y>0], na.rm=TRUE)/2
+      .y[.y<.min] <- .min
+      .y[is.na(.y)] <- .min
+      .y <- log10(.y)
+      .y1.at <- pretty(.y)
+      .y1.at <- .y1.at[.y1.at == round(.y1.at)]
+      #need to go back in and recalculate min as
+      #bottom of pretty...
+      .y[.y==log10(.min)] <- .y1.at[1]
+
+      #.y1.lb <- as.character(10^(.y1.at))
+      .y1.lb <- c(format(10^(.y1.at), drop0trailing = TRUE, scientific=FALSE))
+      .y <- .y - min(.y1.at)
+      .y1.at <- .y1.at - min(.y1.at)
+
+      .yl <- "Source Loading"
+      .y2 <- d2[, paste("pc_", i, sep="")]
+      .y2 <- ((.y2/100) * (max(.y1.at) - min(.y1.at))) + min(.y1.at)
+      .y2.lb <- seq(0, 100, by=20)
+      .y2.at <- ((.y2.lb/100) * (max(.y1.at) - min(.y1.at))) + min(.y1.at)
+
+    } else {
+      #plotting first axis as is
+      .f.cex = 0.8
+      .x <- d2$SPECIES_NAME
+      .m.x <- max(nchar(.x), na.rm=T)/1.3*.f.cex
+      .y <- d2[,i]
+      .y1.at <- pretty(.y)
+      .y1.lb <- .y1.at
+      .yl <- "Source Loading"
+      .y2 <- d2[, paste("pc_", i, sep="")]
+      .y2 <- ((.y2/100) * (max(.y1.at) - min(.y1.at))) + min(.y1.at)
+      .y2.lb <- seq(0, 100, by=20)
+      .y2.at <- ((.y2.lb/100) * (max(.y1.at) - min(.y1.at))) + min(.y1.at)
+
+    }
+
+    #lims <- range(d2[,4], na.rm=TRUE)
+    #.min <- min(d2[,4][d2[,4]>0], na.rm=TRUE)
+
+    .old.par <- par(no.readonly = TRUE)
+    par(mar = c(.m.x, 4, 4, 4) + 0.3)
+    .bar <- barplot(.y,
+                    names.arg=.x,
+                    ylim = range(.y1.at),
+                    main=i,
+                    cex.lab=.f.cex,
+                    cex.names=.f.cex,
+                    axes=FALSE,
+                    ylab=.yl,
+                    las=2
+    )
+    points(.bar, .y2, col="red", pch=19)
+    axis(2, ylim=range(.y1.at), at=.y1.at, labels=.y1.lb, cex=.f.cex,
+         cex.axis=.f.cex, las=2)
+    axis(4, ylim=range(0, max(.y2.at)), at=.y2.at, labels=.y2.lb,
+         col="red", col.axis="red", cex=.f.cex,
+         cex.axis=.f.cex, las=2)
+    mtext("Percent of Total Contribution (%)", side=4, line=3, col ="red",
+          cex=.f.cex)
+    par(.old.par)
+
+
+  }
+  invisible(dat)
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+################
+################
+## unexported
+################
+################
+
+
+#think about
+#######################################
+# printing amount missing as a segment
+# adding plot arg control like in plot.respeciate
+# adding args to change the displacement of labels
+
+rsp_profile_pie <- function (x, labels = names(x), edges = 200, radius = 0.8,
+                             clockwise = FALSE,
+                             init.angle = if (clockwise) 90 else 0,
+                             density = NULL, angle = 45, col = NULL,
+                             border = NULL, lty = NULL, main = NULL, ...)
+{
+  #this is graphics::pie with a couple of modifications...
+  #many thanks to...
+  #R Core Team (2023). _R: A Language and Environment for Statistical Computing_. R Foundation
+  #for Statistical Computing, Vienna, Austria. <https://www.R-project.org/>.
+
+  if (!is.numeric(x) || any(is.na(x) | x < 0))
+    stop("'x' values must be positive.")
+  if (is.null(labels))
+    labels <- as.character(seq_along(x))
+  else labels <- as.graphicsAnnot(labels)
+
+  #added to remove any source with a zero contribution
+  if(any(x==0)){
+    labels <- labels[x!=0]
+    x <- x[x!=0]
+  }
+  my.tot <- sum(x)
+  if(my.tot < 99){
+    x <- c(x, 100-my.tot)
+    labels <- c(labels, "[hide]")
+    col <- c(col, NA)
+    init.angle <- init.angle + (((100-my.tot)/200)*360)
+  }
+
+  x <- c(0, cumsum(x)/sum(x))
+  dx <- diff(x)
+  nx <- length(dx)
+  plot.new()
+  pin <- par("pin")
+  xlim <- ylim <- c(-1, 1)
+  if (pin[1L] > pin[2L])
+    xlim <- (pin[1L]/pin[2L]) * xlim
+  else ylim <- (pin[2L]/pin[1L]) * ylim
+  dev.hold()
+  on.exit(dev.flush())
+  plot.window(xlim, ylim, "", asp = 1)
+  if (is.null(col))
+    col <- if (is.null(density))
+      c("white", "lightblue", "mistyrose", "lightcyan",
+        "lavender", "cornsilk")
+  else par("fg")
+  if (!is.null(col))
+    col <- rep_len(col, nx)
+  if (!is.null(border))
+    border <- rep_len(border, nx)
+  if (!is.null(lty))
+    lty <- rep_len(lty, nx)
+  angle <- rep(angle, nx)
+  if (!is.null(density))
+    density <- rep_len(density, nx)
+  twopi <- if (clockwise)
+    -2 * pi
+  else 2 * pi
+  t2xy <- function(t) {
+    t2p <- twopi * t + init.angle * pi/180
+    list(x = radius * cos(t2p), y = radius * sin(t2p))
+  }
+  for (i in 1L:nx) {
+
+    if(!as.character(labels[i]) == "[hide]"){
+      n <- max(2, floor(edges * dx[i]))
+      P <- t2xy(seq.int(x[i], x[i + 1], length.out = n))
+      #changed shape to include hole
+      polygon(c(P$x, rev(P$x*0.5)), c(P$y, rev(P$y*0.5)), density = density[i], angle = angle[i],
+              border = border[i], col = col[i], lty = lty[i])
+      P <- t2xy(mean(x[i + 0:1]))
+      lab <- as.character(labels[i])
+      if (!is.na(lab) && nzchar(lab)) {
+        # 1.1 and 1.2 are the extenders when moving labels way from
+        # the pie plot itself
+        lines(c(1, 1.1) * P$x, c(1, 1.1) * P$y)
+        text(1.2 * P$x, 1.2 * P$y, labels[i], xpd = TRUE,
+             adj = ifelse(P$x < 0, 1, 0), ...)
+      }
+    }
+  }
+
+  text(0,0, label=paste("sum\n",signif(my.tot, 3), "%", sep=""))
+  title(main = main, ...)
+  invisible(NULL)
+}
+
+
diff --git a/R/sp.rescale.R b/R/sp.rescale.R
new file mode 100644
index 0000000..f617c4e
--- /dev/null
+++ b/R/sp.rescale.R
@@ -0,0 +1,330 @@
+#' @name sp.rescale
+#' @title (re)SPECIATE profile rescaling functions
+#' @aliases sp_rescale sp_rescale_profile sp_rescale_species
+
+#' @description Functions for rescaling
+
+#' @description \code{sp_rescale} rescales the percentage weight records in
+#' a supplied (re)SPECIATE profile data set. This can be by profile or species
+#' subsets, and \code{sp_rescale_profile} and \code{sp_rescale_species} provide
+#' short-cuts to these options.
+#' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+#' profiles.
+#' @param method numeric, the rescaling method to apply:
+#'   1 \code{x/total(x)};
+#'   2 \code{x/mean(x)};
+#'   3 \code{x-min(x)/max(x)-min(x)};
+#'   4 \code{x-mean(x)/sd(x)};
+#'   5 \code{x/max(x)}.
+#' The alternative 0 returns the records to their original
+#' values.
+#' @param by character, when rescaling \code{x} with
+#' \code{\link{sp_rescale}}, the data type to group and rescale,
+#' currently \code{'species'} (default) or \code{'profile'}.
+#' @return \code{sp_rescale} and \code{sp_rescale} return the
+#' \code{respeciate} profile with the percentage weight records rescaled using
+#' the requested method. See Note.
+#' @note Data sometimes needs to be normalised, e.g. when applying some
+#' statistical analyses. Rather than modify the EPA records in the
+#' \code{WEIGHT_PERCENT} column, \code{respeciate} creates a duplicate column
+#' \code{.value} which is modified by operations like \code{sp_rescale_profile}
+#' and \code{sp_rescale_species}. This means rescaling is always applied to
+#' the source information, rather than rescaling an already rescaled value,
+#' and the EPA records are retained unaffected. So, the original source
+#' information can be easily recovered.
+#' @references
+#'   Dowle M, Srinivasan A (2023). data.table: Extension of `data.frame`.
+#'   R package version 1.14.8, \url{https://CRAN.R-project.org/package=data.table}.
+
+#NOTE
+
+#' @rdname sp.rescale
+#' @export
+## #' @import data.table
+#   now done in xxx.r
+
+# may need to set data.table specifically??
+#      data.table::as.data.table, etc??
+
+
+######################
+#rescale by profile
+######################
+
+##########################
+#issues
+############################
+
+#think about...
+
+# maybe rethink options for sp_rescale() when user sets by but NOT method...
+
+# not actually sure anyone would want anything but method 1 for profiles...
+
+
+
+sp_rescale <- function(x, method = 2, by = "species"){
+
+  #################################
+  #check x is a respeciate object??
+
+  #check it has .value
+  x <- rsp_tidy_profile(x)
+
+  #save to return as is..
+  #    thinking about this
+  tmp <- class(x)
+
+  #was testing/thinking about
+  #################################
+  #backdoor
+  #################################
+  #could save species and profile identifiers, and
+  #weight_percent then overwrite weight_percent with .value here
+  #then copy back weight_percent after calculations
+  #    then you could do multiple rescales without touching
+  #    weight_percent???
+
+
+  #################################
+  #calculate stats
+  xx <- as.data.table(x)
+  #remove stats if there...
+  test <- c(".min",".max",".total", ".mean", ".na", ".n", ".sd")
+  test <- test[ test %in% colnames(xx)]
+  if(length(test)>0){
+    xx <- xx[, (test) := NULL]
+  }
+
+  #and recalculate
+
+  #stop if by option not known.
+  if(!by %in% c("species", "profile")){
+    stop("unknown 'by' option",
+         call.= FALSE)
+  }
+
+  if(by == "profile"){
+    out <- xx[,
+              .(#SPECIES_NAME = SPECIES_NAME[1],
+                #SPEC_MW = SPEC_MW[1],
+                .min = min(WEIGHT_PERCENT, na.rm = TRUE),
+                .max = max(WEIGHT_PERCENT, na.rm = TRUE),
+                .total = sum(WEIGHT_PERCENT, na.rm = TRUE),
+                .mean = mean(WEIGHT_PERCENT, na.rm = TRUE),
+                .na = length(WEIGHT_PERCENT[is.na(WEIGHT_PERCENT)]),
+                .n = length(WEIGHT_PERCENT[!is.na(WEIGHT_PERCENT)]),
+                .sd = sd(WEIGHT_PERCENT, na.rm = TRUE)
+              ),
+              by=.(PROFILE_CODE)]
+
+    out <- merge(xx, out, by="PROFILE_CODE", all.x=TRUE, all.y=FALSE,
+                 allow.cartesian=TRUE)
+  }
+  if(by == "species"){
+    out <- xx[,
+              .(.min = min(WEIGHT_PERCENT, na.rm = TRUE),
+                .max = max(WEIGHT_PERCENT, na.rm = TRUE),
+                .total = sum(WEIGHT_PERCENT, na.rm = TRUE),
+                .mean = mean(WEIGHT_PERCENT, na.rm = TRUE),
+                .na = length(WEIGHT_PERCENT[is.na(WEIGHT_PERCENT)]),
+                .n = length(WEIGHT_PERCENT[!is.na(WEIGHT_PERCENT)]),
+                .sd = sd(WEIGHT_PERCENT, na.rm = TRUE)
+              ),
+              by=.(SPECIES_ID)]
+
+    out <- merge(xx, out, by="SPECIES_ID", all.x=TRUE, all.y=FALSE,
+                 allow.cartesian=TRUE)
+  }
+
+
+  #################
+  #need to decide how to handle this
+  ###################
+  #this is a little messy
+
+  #rather leave weight-percent untouched
+  #   make a local .value column if not there
+  #       then have rescale reset that
+  #       using weight-percent as source
+
+  #not sure of a best plan for method arg
+  #     how to handling...?
+  #     how much user control...?
+
+  #other options
+  #     any more???
+
+  #might need an option to get all WEIGHT_PERCENT back from database...
+
+  if(!method %in% c(0:5)){
+    stop("unknown method")
+  }
+  if(method==0){
+    out$.value<- out$WEIGHT_PERCENT
+  }
+  if(method==1){
+    out$.value<- out$WEIGHT_PERCENT / out$.total
+  }
+  if(method==2){
+    out$.value<- out$WEIGHT_PERCENT /out$.mean
+  }
+  if(method==3){
+    #might be an issue if only one value
+    out$.value <- (out$WEIGHT_PERCENT - out$.min) / (out$.max - out$.min)
+  }
+  if(method==4){
+    #might be an issue if only one value
+    out$.value <- (out$WEIGHT_PERCENT - out$.mean) / out$.sd
+  }
+  if(method==5){
+    out$.value<- out$WEIGHT_PERCENT / out$.max
+  }
+
+  ############################
+  #backdoor end
+  ############################
+  #this would need careful thinking about
+  #     handling for method = 0 might be an issue
+  #     maybe a sp_reset() would be better
+  #         remove weight_percent and apply sp_pad(out, "weight")?
+
+  #output
+  #
+  out <- as.data.frame(out)
+  class(out) <- tmp
+  out
+}
+
+
+#' @rdname sp.rescale
+#' @export
+
+sp_rescale_profile <- function(x, method = 1, by ="profile"){
+  sp_rescale(x=x, method=method, by=by)
+}
+
+#' @rdname sp.rescale
+#' @export
+
+sp_rescale_species <- function(x, method = 2, by ="species"){
+  sp_rescale(x=x, method=method, by=by)
+}
+
+
+
+
+###################
+#unexported
+##################
+
+
+######################
+#old.rescale data by species
+######################
+
+##########################
+#issues
+############################
+
+# may need to set data.table specifically??
+#      data.table::as.data.table, etc??
+
+# may need to think about additional local scaling
+#      e.g. within in profile [species conc]/[sum of all species concs]
+
+rsp_rescale_species <- function(x, method = 2){
+
+  #################################
+  #check x is a respeciate object??
+
+  #check it has .value
+  x <- rsp_tidy_profile(x)
+
+  #save to return as is..
+  #    thinking about this
+  tmp <- class(x)
+
+  #################################
+  #calculate stats
+  xx <- as.data.table(x)
+  #remove stats if there...
+  test <- c(".min",".max",".total", ".mean", ".na", ".n", ".sd")
+  test <- test[ test %in% colnames(xx)]
+  if(length(test)>0){
+    xx <- xx[, (test) := NULL]
+  }
+  #and recalculate
+  out <- xx[,
+            .(#SPECIES_NAME = SPECIES_NAME[1],
+              #SPEC_MW = SPEC_MW[1],
+              .min = min(WEIGHT_PERCENT, na.rm = TRUE),
+              .max = max(WEIGHT_PERCENT, na.rm = TRUE),
+              .total = sum(WEIGHT_PERCENT, na.rm = TRUE),
+              .mean = mean(WEIGHT_PERCENT, na.rm = TRUE),
+              .na = length(WEIGHT_PERCENT[is.na(WEIGHT_PERCENT)]),
+              .n = length(WEIGHT_PERCENT[!is.na(WEIGHT_PERCENT)]),
+              .sd = sd(WEIGHT_PERCENT, na.rm = TRUE)
+            ),
+            by=.(SPECIES_ID)]
+
+  out <- merge(xx, out, by="SPECIES_ID", all.x=TRUE, all.y=FALSE,
+               allow.cartesian=TRUE)
+
+  #################
+  #need to decide how to handle this
+  ###################
+  #this is a little messy
+
+  #rather leave weight-percent untouched
+  #   make a local .value column if not there
+  #       then have rescale reset that
+  #       using weight-percent as source
+
+  #not sure of a best plan for method arg
+  #     how to handling...?
+  #     how much user control...?
+
+  #other options
+  #     no rescale
+  #     x/sum(x) so all out of 1?
+  #         that might want to only for unique profile codes???
+
+  if(!method %in% c(0:5)){
+    stop("unknown method")
+  }
+  if(method==0){
+    out$.value<- out$WEIGHT_PERCENT
+  }
+  if(method==1){
+    out$.value<- out$WEIGHT_PERCENT / out$.total
+  }
+  if(method==2){
+    out$.value<- out$WEIGHT_PERCENT /out$.mean
+  }
+  if(method==3){
+    #might be an issue if only one value
+    out$.value <- (out$WEIGHT_PERCENT - out$.min) / (out$.max - out$.min)
+  }
+  if(method==4){
+    #might be an issue if only one value
+    out$.value <- (out$WEIGHT_PERCENT - out$.mean) / out$.sd
+  }
+  if(method==5){
+    out$.value<- out$WEIGHT_PERCENT / out$.max
+  }
+
+  #output
+  #
+  out <- as.data.frame(out)
+  class(out) <- tmp
+  out
+}
+
+
+
+
+
+
+
+
diff --git a/R/sp.reshape.R b/R/sp.reshape.R
new file mode 100644
index 0000000..4c4d371
--- /dev/null
+++ b/R/sp.reshape.R
@@ -0,0 +1,369 @@
+#' @name sp.reshape
+#' @title (re)SPECIATE profile reshaping functions
+#' @aliases sp_dcast sp_dcast_profile sp_dcast_species sp_melt_wide
+
+#' @description Functions for reshaping (re)SPECIATE profiles
+
+#' @description \code{sp_dcast} and \code{sp_melt_wide} reshape supplied
+#' (re)SPECIATE profile(s). \code{sp_dcast} converts these from their supplied
+#' long form to a widened form, \code{dcast}ing the data set by either species
+#' or profiles depending on the \code{widen} setting applied.
+#' \code{sp_dcast_profile} and \code{sp_dcast_species} are wrappers for these
+#' options. \code{sp_melt_wide} attempts to return a previously widened data
+#' set to the original long form.
+#' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+#' profiles in standard long form or widened form for
+#' \code{\link{sp_dcast}} and \code{\link{sp_melt_wide}}, respectively.
+#' @param widen character, when widening \code{x} with
+#' \code{\link{sp_dcast}}, the data type to \code{dcast},
+#' currently \code{'species'} (default) or \code{'profile'}. See Note.
+#' @param pad logical or character, when \code{melt}ing a previously widened
+#' data set, should output be re-populated with species and/or profile
+#' meta-data, discarded when widening. This is currently handled by
+#' \code{\link{sp_pad}}. The default \code{TRUE} applies standard settings,
+#' so does not include profile sources reference meta-data. (See
+#' \code{\link{sp_pad}} for other options).
+#' @param drop.nas logical, when \code{melt}ing a previously widened
+#' data set, should output be stripped of any rows containing empty
+#' weight/value columns. Because not all profile contains all species, the
+#' \code{dcast}/\code{melt} process can generate empty rows, and this step
+#' attempt account for that when working with standard re(SPECIATE)
+#' profiles. It is, however, sometimes useful to check first, e.g. when
+#' building profiles yourself.
+#' @return \code{sp_dcast} returns the wide form of the supplied
+#' \code{respeciate} profile. \code{sp_melt_wide}
+#' returns the (standard) long form of a previously widened profile.
+
+#' @note Conventional long-to-wide reshaping of data, or \code{dcast}ing, can
+#' be slow and memory inefficient. So, \code{respeciate} uses the
+#' \code{\link[data.table:dcast]{data.table::dcast}}
+#' method. The \code{sp_dcast_species} method,
+#' applied using \code{widen='species'}, is effectively:
+#'
+#' \code{dcast(..., PROFILE_CODE+PROFILE_NAME~SPECIES_NAME, value.var="WEIGHT_PERCENT")}
+#'
+#' And, the alternative \code{widen='profile'}:
+#'
+#' \code{dcast(..., SPECIES_ID+SPECIES_NAME~PROFILE_CODE, value.var="WEIGHT_PERCENT")}
+#'
+#' Although, \code{respeciate} uses a local version of \code{WEIGHT_PERCENT} called
+#' \code{.value}, so the EPA source information can easily be recovered. See also
+#' \code{\link{sp_rescale_profile}}.
+#'
+#' @references
+#'   Dowle M, Srinivasan A (2023). _data.table: Extension of `data.frame`_.
+#'   R package version 1.14.8, <https://CRAN.R-project.org/package=data.table>.
+
+#NOTE
+
+#' @rdname sp.reshape
+#' @export
+
+##   now imports from xxx.r
+##   #' @import data.table
+
+# may need to set data.table specifically??
+#      data.table::as.data.table, etc??
+
+#in development sp_melt_wide below
+
+#maybe think about padding sp_dcast_profile
+
+######################
+#dcast
+#long_to_wide reshape
+######################
+
+sp_dcast <- function(x, widen = "species"){
+
+  ####################
+  #see ?data.table::dcast for examples
+  ####################
+
+  # currently running with any x
+  # but likely errors out with anything but a respeciate object...
+
+  #note: should this handle non-respeciate objects?
+  #      maybe not but with a force option to (at own risk) override??
+
+  #note: thinking about adding formal to set the wide term
+  #      so user can set the dcast term, e.g. ~ species or profile
+  #      name key? wide? variable.name?
+
+  #adds .value if missing
+  ## using .value rather the WEIGHT_PERCENT in case rescaled
+  x <- rsp_tidy_profile(x)
+
+  #save class
+  cls <- class(x)
+
+  xx <- as.data.table(x)
+
+  #stop if widen option not known.
+  if(!widen %in% c("species", "profile")){
+    stop("unknown widen option")
+  }
+  if(widen=="species"){
+    out <- dcast(xx,
+                 PROFILE_CODE + PROFILE_NAME ~ SPECIES_NAME,
+                 mean,
+                 na.rm=TRUE,
+                 value.var = ".value")
+  }
+  if(widen=="profile"){
+    out <- dcast(xx,
+                 SPECIES_ID + SPECIES_NAME ~PROFILE_CODE,
+                 mean,
+                 na.rm=TRUE,
+                 value.var = ".value")
+  }
+
+  #maybe use species_id in species dcast??
+  ########################################
+
+  #then add matched species_names, making this unique??
+  #   ~species_name makes labeling easier but will be
+  #       merging any duplicated species_names...
+  #   could also add back in other profile info but not species info...
+  #       which means we will loose that it you melt(dcast(...))
+
+  #output
+  #   just outputting as data.frame at moment
+  #       not sure if this can be respeciate and
+  #          don't want a respeciate wide class
+  #   could use an output arg??? as.is, data.frame, etc...
+
+  out <- as.data.frame(out)
+  #class(out) <- cls
+  out
+}
+
+#' @rdname sp.reshape
+#' @export
+
+sp_dcast_profile <- function(x, widen = "profile"){
+  sp_dcast(x=x, widen=widen)
+}
+
+
+
+#' @rdname sp.reshape
+#' @export
+
+sp_dcast_species <- function(x, widen = "species"){
+  sp_dcast(x=x, widen=widen)
+}
+
+
+
+
+#' @rdname sp.reshape
+#' @export
+
+##   now imports from xxx.r
+##   #' @import data.table
+
+# may need to set data.table specifically??
+#      data.table::as.data.table, etc??
+
+# wanted melt to go with dcast
+#    curently just to reverse the dcast action
+#    see e.g.
+#    https://cran.r-project.org/web/packages/data.table/vignettes/datatable-reshape.html
+
+#padding needs work
+#
+
+######################
+#melt
+#wide_to_long reshape
+######################
+
+sp_melt_wide <- function(x, pad = TRUE, drop.nas = TRUE){
+
+  ####################
+  #see ?data.table::melt for examples
+  ####################
+
+  #adds .value if missing
+  ## using .value rather the WEIGHT_PERCENT in case rescaled
+  x <- rsp_tidy_profile(x)
+
+  #save class
+  cls <- class(x)
+
+  xx <- as.data.table(x)
+
+  #################
+  #test log/wide
+  ####################
+  #should be able to simplify this a lot
+  .test <- c("PROFILE_NAME", "PROFILE_CODE", "SPECIES_ID", "SPECIES_NAME",
+             "WEIGHT_PERCENT", ".value")
+  .test <- .test[.test %in% names(xx)]
+  if(length(.test)>2){
+    stop("sp_melt_wide halted; x already looks like a long profile.", call.=FALSE)
+  }
+  .test.sp <- length(grep("PROFILE", .test))
+  .test.pr <- length(grep("SPECIES", .test))
+  if(.test.pr>0 & .test.sp>0){
+    stop("sp_melt_wide halted; x already looks suspect.", call.=FALSE)
+  }
+  .long <- "bad"
+  if(.test.pr>0 & length(.test)==.test.pr){
+    .id.vars <- .test
+    .long <- "PROFILE_CODE"
+  }
+  if(.test.sp>0 & length(.test)==.test.sp){
+    .id.vars <- .test
+    .long <- "SPECIES_NAME"
+  }
+  if(.long=="bad"){
+    stop("sp_melt_wide halted; x already looks suspect.", call.=FALSE)
+  }
+
+  #should only be species.wide or profile.wide
+  #   if we get to here
+
+  out <- melt(xx, id.vars = .id.vars)
+  names(out)[names(out)=="variable"] <- .long
+  names(out)[names(out)=="value"] <- ".value"
+
+  #out$WEIGHT_PERCENT <- out$.value
+
+  #merge if padding
+  #####################
+  #might not be best way of doing it
+  #testing sp_pad as an alternative to previous remarked code???
+  #     first need to standardise method, decide where to drop.nas,
+  #        finalise formals, decide best data.table methods, etc
+
+  if(is.logical(pad) && pad){
+    pad <- "standard"
+  }
+  if(is.character(pad)){
+
+    out <- sp_pad(out)
+
+#    PROFILES <- as.data.table(sysdata$PROFILES)
+#    SPECIES_PROPERTIES <- as.data.table(sysdata$SPECIES_PROPERTIES)
+#    if(.long=="PROFILE_CODE"){
+#      out <- merge(out, PROFILES, by = .long, all.y=FALSE,
+#                   all.x=TRUE, allow.cartesian=TRUE)
+#      .tmp <- intersect(names(out), names(SPECIES_PROPERTIES))
+#      out <- merge(out, SPECIES_PROPERTIES, by = .tmp, all.y=FALSE,
+#                   all.x=TRUE, allow.cartesian=TRUE)
+#    } else {
+#      #.long must be "SPECIES_NAME"
+#      out <- merge(out, SPECIES_PROPERTIES, by = .long, all.y=FALSE,
+#                  all.x=TRUE, allow.cartesian=TRUE)
+#      .tmp <- intersect(names(out), names(PROFILES))
+#      out <- merge(out, PROFILES, by = .tmp, all.y=FALSE,
+#                   all.x=TRUE, allow.cartesian=TRUE)
+#    }
+#    #to get weight_percentage etc
+#    SPECIES <- as.data.table(sysdata$SPECIES)
+#    .tmp <- intersect(names(out), names(SPECIES))
+#    print(.tmp)
+#    out <- merge(out, SPECIES, by = .tmp, all.y=FALSE,
+#                 all.x=TRUE, allow.cartesian=TRUE)
+#  } else {
+#    #not great but...
+#    #if not padding WEIGHT_PERCENT has to be .value
+#    out$WEIGHT_PERCENT <- out$.value
+  }
+  #drop.nas...
+  if(drop.nas){
+    if(".value" %in% names(out)){
+      out <- out[!is.na(out$.value),]
+    } else {
+      if("WEIGHT_PERCENT" %in% names(out)){
+        out <- out[!is.na(out$WEIGHT_PERCENT),]
+      }
+        #do we want to warn if nothing to strip
+        #if so, in else here??
+    }
+  }
+  out <- as.data.frame(out)
+
+  #need to rationalise outputs!!!
+  rsp_build_respeciate(out)
+
+}
+
+
+########################
+#unexported, old code and text notes
+#######################
+
+#test data 219 records
+#aa <- sp_profile(sp_find_profile("ae6", by="profile_type"))
+
+#idiot-test reference.
+
+#I don't think I am using it anywhere...
+
+#########################
+#testing getting rid of this
+#########################
+
+#rsp_build_wide_profile <- function(x){
+#  .usp <- unique(x$SPECIES_NAME)
+#  .upr <- unique(x$PROFILE_CODE)
+#  ref <- data.frame(t(rep(NA, length(.usp))))
+#  names(ref) <- .usp
+#  ans <- lapply(.upr, function(.pr){
+#    temp <- x[x$PROFILE_CODE==.pr,]
+#    out <- data.frame(t(temp$WEIGHT_PERCENT))
+#    names(out) <- temp$SPECIES_NAME
+#    out<- modifyList(ref, out)
+#    out$PROFILE_CODE <- .pr
+#    out$PROFILE_NAME <- temp$PROFILE_NAME[1]
+#    out
+#    #ref
+#  })
+#  do.call(rbind, ans)
+#}
+
+#require(dplyr)
+
+#aa %>% group_by(PROFILE_CODE, SPECIES_ID) %>%
+#   summarise(PROFILE_NAME = PROFILE_NAME[1],
+#             SPECIES_NAME = SPECIES_NAME[1],
+#             SPEC_MW = SPEC_MW[1],
+#             total = sum(WEIGHT_PERCENT, na.rm=T),
+#             mean = mean(WEIGHT_PERCENT, na.rm=T),
+#             sd = sd(WEIGHT_PERCENT, na.rm=T),
+#             n = length(WEIGHT_PERCENT[!is.na(WEIGHT_PERCENT)]))
+
+#require(data.table)
+
+#test_wide <- function(x){
+  ##as above
+  #####################
+  ##see ?data.table::dcast for examples
+  #####################
+  #xx <- as.data.table(x)
+  #out <- dcast(xx,
+  #             PROFILE_CODE + PROFILE_NAME ~SPECIES_NAME,
+  #             mean,
+  #             na.rm=TRUE,
+  #             value.var = "WEIGHT_PERCENT")
+  ##maybe use species id in dcast
+  ##then add matched species names, making unique??
+  ##you can keep profile but not species info...
+  #as.data.frame(out)
+#}
+
+#plot(test_wide(aa)$Iron, rsp_build_wide_profile(aa)$Iron)
+## ~ 2 seconds
+
+#aa <- aa <- sp_profile(find_sp_profile("gas", by="profile_type"))
+## 2641 profiles
+## (full archive 6855)
+#plot(test_wide(aa)$Formaldehyde)
+## much faster than
+#plot(rsp_build_wide_profile(aa)$Formaldehyde)
+#plot(test_wide(aa)$Formaldehyde, rsp_build_wide_profile(aa)$Formaldehyde)
+##straight y=x
+#
diff --git a/R/spq.R b/R/spq.R
new file mode 100644
index 0000000..7be81c1
--- /dev/null
+++ b/R/spq.R
@@ -0,0 +1,89 @@
+#' @name spq
+#' @title spq_ quick access to common re(SPECIATE) sub-samples
+#' @aliases spq_gas spq_other spq_pm spq_pm.ae6 spq_pm.ae8 spq_pm.cr1
+#' spq_pm.simplified
+
+#' @description \code{spq_} functions are quick access wrappers to commonly
+#' requested re(SPECIATE) sub-samples.
+#' @return \code{spq_} functions typically return a \code{respeciate}
+#' \code{data.frame} of the requested profiles.
+#'
+#' For example:
+#'
+#' \code{sqr_gas} returns all gaseous profiles (\code{PROFILE_TYPE == 'GAS'}).
+#'
+#' \code{sqr_pm} returns all particulate matter (PM) profiles not classified
+#' as a special PM type (\code{PROFILE_TYPE == 'PM'}).
+#'
+#' The special PM types are subsets profiles intended for special
+#' applications, and these include \code{sqr_pm.ae6} (type \code{PM-AE6}),
+#' \code{sqr_pm.ae8} (type \code{PM-AE8}), \code{sqr_pm.cr1} (type
+#' \code{PM-CR1}), \code{sqr_pm.simplified} (type \code{PM-Simplified})
+#' and \code{sqr_other} (\code{PROFILE_TYPE == 'OTHER'}).
+#'
+
+
+#############################
+#NOTE
+############################
+
+#might not be keeping these
+
+#profile types
+#GAS, OTHER, PM, PM-AE6        PM-AE8        PM-CR1 PM-Simplified
+
+#spq_gas,
+
+
+#' @rdname spq
+#' @export
+
+spq_gas <- function(){
+  sp_profile(sp_profile_info("gas", by = "profile_type", partial=FALSE))
+}
+
+#' @rdname spq
+#' @export
+
+spq_other <- function(){
+  sp_profile(sp_profile_info("other", by = "profile_type", partial=FALSE))
+}
+
+#' @rdname spq
+#' @export
+
+spq_pm <- function(){
+  sp_profile(sp_profile_info("pm", by = "profile_type", partial=FALSE))
+}
+
+#' @rdname spq
+#' @export
+
+spq_pm.ae6 <- function(){
+  sp_profile(sp_profile_info("pm-ae6", by = "profile_type", partial=FALSE))
+}
+
+#' @rdname spq
+#' @export
+
+spq_pm.ae8 <- function(){
+  sp_profile(sp_profile_info("pm-ae8", by = "profile_type", partial=FALSE))
+}
+
+#' @rdname spq
+#' @export
+
+spq_pm.cr1 <- function(){
+  sp_profile(sp_profile_info("pm-cr1", by = "profile_type", partial=FALSE))
+}
+
+#' @rdname spq
+#' @export
+
+spq_pm.simplified <- function(){
+  sp_profile(sp_profile_info("pm-simplified", by = "profile_type", partial=FALSE))
+}
+
+
+
+
diff --git a/R/spx.R b/R/spx.R
new file mode 100644
index 0000000..7b9a76c
--- /dev/null
+++ b/R/spx.R
@@ -0,0 +1,262 @@
+#' @name spx
+#' @title spx_ functions for grouping and subsetting
+#' @aliases spx_ spx_copy spx_n_alkane spx_btex
+
+
+#' @description \code{spx_} functions generate a vector of assignment
+#' terms and can be used to subset or condition a supplied re(SPECIATE)
+#' \code{data.frame}.
+#'
+#' Most commonly, the \code{spx_} functions accept a single input, a
+#' re(SPECIATE) \code{data.frame} and return a logical vector of
+#' length \code{nrow(x)}, identifying species of interest as
+#' \code{TRUE}. So, for example, they can be used when
+#' \code{\link{subset}}ting in the form:
+#'
+#' \code{subset(x, spx_n_alkane(x))}
+#'
+#' ... to extract just n-alkane records from a \code{respeciate} object
+#' \code{x}.
+#'
+#' However, some accept additional arguments. For example, \code{spx_copy}
+#' also accepts a reference data set, \code{ref}, and a column identifier,
+#' \code{by}, and tests \code{x$by \%in\% unique(ref$by)}.
+#'
+#' @param x a \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+#' profiles.
+#' @param ref (\code{spx_copy} only) a second \code{respeciate} object, to
+#' be used as reference when testing \code{x}.
+#' @param by (\code{spx_copy} only) character, the name of the column
+#' in \code{ref} to copy when testing \code{x}.
+#' @return \code{spx_copy} outputs can be modified but, by default, it
+#' identifies all species in the supplied reference data set.
+#'
+#' \code{spx_n_alkane} identifies C1 to C40 n-alkanes.
+#'
+#' \code{spx_btex} identifies the BTEX group of aromatic hydrocarbons
+#' (benzene, toluene, ethyl benzene, and M-, O- and P-xylene).
+
+#############################
+#NOTE
+############################
+
+#still not sure this is best way of doing this
+#   but it did not seem to be slowing things down
+#   and other approaches seems likely to get messy
+#   really quick...
+
+# others to do????
+
+# the BTEXs  - doing/testing
+
+# others to consider???
+
+#     PAHs different groups
+#     VOCs HC network.
+#     elementals???
+#     monitoring network relevant subsets of species
+
+# special cases???
+
+#     spx_ref(x, ref, by="")
+#           where x is respeciate object, ref is a reference
+#           by is column in ref; case is x$by %in% unique(ref$by)
+
+#           could ref also be a vector of terms???
+
+
+#' @rdname spx
+#' @export
+
+spx_copy <- function(x, ref = NULL, by="species_id"){
+
+  #maybe warn???
+  if(is.null(ref)){
+    ref <- x
+  }
+  names(x) <- tolower(names(x))
+  names(ref) <- tolower(names(ref))
+  .tmp <- unique(ref[, by])
+  x[, by] %in% .tmp
+
+}
+
+#####################
+#spx_n_alkanes
+#######################
+
+
+#source names
+# from https://en.wikipedia.org/wiki/List_of_straight-chain_alkanes
+# (might be duplicates)
+# (some not using standard names)
+
+# could try smiles, molecular formula, cas numbers???
+# should be one entry/species if they are unique???
+
+#test
+## a <- sysdata$SPECIES_PROPERTIES
+## b <- subset(a, spx_n_alkane(a))
+## b[order(b$SPEC_MW),]
+
+#' @rdname spx
+#' @export
+
+spx_n_alkane <- function(x){
+  #group x by is/isn't n-alkane
+  tolower(x$SPECIES_NAME) %in% c("methane",            #C1
+                                 "ethane",
+                                 "propane",
+                                 "n-butane",
+                                 "n-pentane",          #C5
+                                 "n-hexane",
+                                 "n-heptane",
+                                 "n-octane",
+                                 "n-nonane",
+                                 "n-decane",           #C10
+                                 "n-undecane",
+                                 "n-dodecane",
+                                 "n-tridecane",
+                                 "n-tetradecane", "tetradecane",
+                                 "n-pentadecane", "pentadecane",
+                                 "n-hexadecane", "hexadecane",
+                                 "n-heptadecane", "heptadecane",
+                                 "n-octadecane", "octadecane",
+                                 "n-nonadecane", "nonadecane",
+                                 "n-icosane", "eicosane",         #C20
+                                 "n-heneicosane",
+                                 "n-docosane",
+                                 "n-tricosane",
+                                 "n-tetracosane",
+                                 "n-pentacosane",      #C25
+                                 "n-hexacosane",
+                                 "n-heptacosane",
+                                 "n-octacosane",
+                                 "n-nonacosane",
+                                 "n-triacontane",      #C30
+                                 "n-hentriacontane",
+                                 "n-dotriacontane", "dotriacontane",
+                                 "n-tritriacontane",
+                                 "n-tetratriacontane",
+                                 "n-pentatriacontane", #C35
+                                 "n-hexatriacontane",
+                                 "n-heptatriacontane",
+                                 "n-octatriacontane",
+                                 "n-nonatriacontane",
+                                 "n-tetracontane"      #C40
+  )
+}
+
+
+
+
+
+#####################
+#spx_n_btex
+#######################
+
+#  Benzene, toluene, ethylbenzene, 3 xylenes isomers
+
+#ref
+#https://www.eea.europa.eu/help/glossary/eper-chemicals-glossary/benzene-toluene-ethylbenzene-xylenes-as-btex
+#   like better...
+
+# also have btx group which is same minus ethylbenzene...
+
+#notes
+#####################
+
+#might need more thinking about
+
+#    toluene is sometimes reported as a mixture of toulene and another compound
+#    maybe not separated by GC???
+
+#    ethyl benzene and xylenes might have several names
+#    subset(a, a$Molecular.Formula=="C8H10")
+
+#    are xylenes in as 1,2-, 1,3- and 1,4-dimethyl benzene???
+#    are spaces an issue???
+
+#    if several names for btex, might need to think about how to
+#    sample (CAS? etc), merge and compare???
+
+
+
+#tests
+#########################
+## a <- sysdata$SPECIES_PROPERTIES
+## b <- subset(a, spx_btex(a))
+## b[order(b$SPEC_MW),]
+
+## to do
+
+#' @rdname spx
+#' @export
+
+spx_btex <- function(x){
+  #group x by is/isn't btex
+  tolower(x$SPECIES_NAME) %in% c(
+    #to test/check...
+    "benzene",
+    "toluene",
+    "ethylbenzene",
+    "m-xylene",
+    "o-xylene",
+    "p-xylene"
+  )
+}
+
+
+
+
+
+
+##############################
+#unexported, old, test notes
+##############################
+
+# other options seemed a lot of work...
+
+#spx_n_alkane <- function(x){
+#  x <- spg_n_alkane(x)
+#  x <- x[x$spg_n_alkane,]
+#  x[names(x) != "spg_n_alkane"]
+#}
+
+
+#subset. <- function(x, subset, select, ...){
+#  mf <- match.call(expand.dots = FALSE)
+#  m <- match(c("x", "subset", "select"), names(mf), 0L)
+#  mf <- mf[c(1L, m)]
+#  mf$drop.unused.levels <- TRUE
+#  .x <- as.character(deparse(mf$x))
+#  .subset <- as.character(deparse(mf$subset))
+#  .select <- as.character(deparse(mf$select))
+#  .subs <- strsplit(.subset, "&")[[1]]
+#  for(i in 1:length(.subs)){
+#    .i <- gsub(" ", "", .subs[i])
+#    if(is.function(try(get(.i), silent=TRUE))){
+#      .subs[i] <- paste(.i, "(", .x, ")", sep="")
+#    }
+#  }
+#  .subs <- paste(.subs, collapse = " & ", sep="")
+#
+#
+#print(.subs)
+#print(.select)
+#  .eq <- paste("subset(as.data.frame(", .x,
+#               "), subset = ", .subs, ")",
+#               sep="")
+#  print(.eq)
+#  #mf <- eval(mf, parent.frame())
+#  rsp_build_respeciate(
+#  eval(parse(text=.eq),
+#       parent.frame()))
+#
+##    cl <- sys.call()
+##    f <- get(as.character(cl[[1]]), mode="function", sys.frame(-1))
+##    cl <- match.call(definition=f, call=cl)
+##    x<- as.list(cl)[-1]
+##    deparse(x$subset)
+#
+#}
diff --git a/R/sysdata.R b/R/sysdata.R
index bd7979b..d95abd8 100644
--- a/R/sysdata.R
+++ b/R/sysdata.R
@@ -3,20 +3,37 @@
 ############################################
 #'
 #' @name sysdata
-#' @description splat_packed data [to be described]
+#' @description the re(SPECIATE) sysdata, a local versions of SPECIATE,
+#' the EPA's repository of organic gas and particulate matter (PM) speciation
+#' profiles of air pollution sources.
 #'
-#' @format A ( 5  long) 'list' object
+#' @format A (long) list, containing:
 #' \describe{
-#'   \item{PROFILES}{[to document]}
-#'   \item{SPECIES}{[to document]}
-#'   \item{SPECIES_PROPERTIES}{[to document]}
-#'   \item{PROFILE_REFERENCE}{[to document]}
-#'   \item{REFERENCES}{[to document]}
+#'   \item{PROFILES}{The main \code{data.frame} of profile-specific meta-data,
+#'   with one row per profile, key term \code{PROFILE_CODE}.}
+#'   \item{SPECIES}{The main \code{data.frame} of individual record meta-data,
+#'   with one row per species in each profile, key terms \code{PROFILE_CODE}
+#'   and \code{SPECIES_ID} linking \code{PROFILES} and
+#'   \code{SPECIES_PROPERTIES}.}
+#'   \item{SPECIES_PROPERTIES}{The main \code{data.frame} of species-specific
+#'   meta-data, with one row per species, key term \code{SPECIES_ID}.}
+#'   \item{PROFILE_REFERENCE}{The \code{data.frame} linking profile and
+#'   reference meta-data, one row per references per profile, key terms
+#'   \code{PROFILE_CODE} and \code{REF_Code}.}
+#'   \item{REFERENCES}{The main \code{data.frame} of references for profile
+#'   source meta-data, one row per reference, key term \code{REF_Code}.}
+#'   \item{And others}{Currently not documented.}
 #' }
 
-globalVariables(c("sysdata", "PROFILE_CODE")) #don't like this...
+##################################
+##################################
+## IMPORTANT
+##################################
+##################################
 
-#' @source [to doc]
+# this documentation needs doing
+
+#' @source https://www.epa.gov/air-emissions-modeling/speciate
 #' @references
 #' Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D.,
 #' Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010.
diff --git a/R/xxx.R b/R/xxx.R
new file mode 100644
index 0000000..0de879a
--- /dev/null
+++ b/R/xxx.R
@@ -0,0 +1,629 @@
+##############################
+#setup code, misc code,
+#testing code, etc
+##############################
+
+#currently no hooks, etc...
+
+#####################
+#to check
+#####################
+
+# I think all build/check issues associate with
+#     xxx_test and its depends...
+#        (not keeping unless we can get it to work better)
+
+
+utils::globalVariables(c("sysdata",
+                         "PROFILE_CODE", "PROFILE_NAME", "PROFILE_TYPE",
+                         "SPECIES_ID", "SPECIES_NAME",
+                         "SPEC_MW", "WEIGHT_PERCENT", ".", ".value"))
+
+########################
+#to think about...
+#######################
+
+# all @import here
+#    in case we have to move to data.table::as.data.table, etc...
+
+#' @import data.table
+
+#   data.table used by:
+#         rsp_test_profile,
+#         sp_dcast_profile, and those that use dcast?
+#               sp_species_cor
+#               sp_profile_distance
+#         and others???
+#               need to identify them
+
+#' @importFrom stats sd cophenetic cor cutree dist hclust heatmap AIC
+#' as.formula coefficients formula lm nls nls.control predict
+#' @importFrom utils modifyList head
+#' @importFrom graphics axis barplot par legend lines rect text abline
+#' grid mtext plot.new plot.window points polygon title
+#' @importFrom grDevices cm.colors colorRampPalette as.graphicsAnnot
+#' dev.flush dev.hold heat.colors
+
+
+
+#might be able to drop legend?
+#   check plot.respeciate
+
+
+##############################
+#common unexported
+##############################
+
+
+############################
+#unexported function to
+#test is x respeciate
+
+##aa <- sp_profile(sp_find_profile("ae8", by="profile_type"))
+##bb <- sp_find_profile("ethanol")
+##bb <- sp_find_species("butane")
+
+##rsp_test_respeciate(aa, level=1)
+## level 1 TRUE for respeciate only (needs to look like respeciate)
+## level likewise but TRUE for respeciate, respeciate.ref and respeciate.spcs
+
+##might need to rethink this if I want to use it as is.respeciate method
+##  is only allows 1 argument.
+
+## could also test for .value
+
+rsp_test_respeciate <- function(x, level = 1,
+                                silent = FALSE){
+  test <- class(x)
+  out <- "bad"
+  test <- test[1]=="respeciate"
+  #order below matters
+  #maybe tidy??
+  if(all(c("SPECIES_NAME", "SPECIES_ID") %in% names(x))){
+    out <- "respeciate.species.ref"
+  }
+  if(all(c("PROFILE_NAME", "PROFILE_CODE") %in% names(x))){
+    out <- "respeciate.profile.ref"
+  }
+  if(all(c("SPECIES_NAME", "SPECIES_ID", "PROFILE_NAME", "PROFILE_CODE",
+           "WEIGHT_PERCENT") %in% names(x))){
+    out <- "respeciate"
+  }
+  if(!silent){
+    if(test && out=="bad"){
+      warning("suspect respeciate object!")
+    }
+    if(nrow(x)==0){
+      warning("empty respeciate object!")
+    }
+  }
+  if(level==1){
+    return(test)
+  }
+  return(out)
+}
+
+#######################
+#tidy profile
+
+#I am thinking of using a .value column as my value column
+#then WEIGHT_PERCENT remains as EPA made it even if we rescale...
+
+## testing this idea at the moment
+##     make .value using rsp_tidy_profile
+##     enabled in plot.respeciate, sp_profile_rescale, sp_profile_dcast
+##                rsp_test_profile
+
+rsp_tidy_profile <- function(x){
+  #.value is local version of weight
+  if(!".value" %in% names(x)){
+    x$.value <- x$WEIGHT_PERCENT
+  }
+  x
+}
+
+###########################
+#tidy names
+
+#currently not exported
+#quick code to tidy species names
+
+#currently used in plot.respeciate
+
+#note: not fully tested
+
+
+rsp_tidy_species_name <- function(x){
+
+  #attempts shorten names by remove other versions
+  #names seem to be in format a (or b || c)
+  #where (guessing) a is main name and
+  #         b and c are alternatives.
+
+  #not fully tested,
+  #   might still be more cases this dies on
+
+  #gsub("[(].*","", x) failed if name a includes brackets
+  #example:#"(2-methylpropyl)benzene (or isobutylbenzene)"
+
+  #sub("[(][^(]or+$", "", x) failed if b or c includes brackets
+  #also left space at end so needed sub("^\\s+|\\s+$", "", x)
+
+  #sometimes it is "( or "
+  x <- gsub(" [(] or ", " (or ", x)
+  #next removes from last "(or" onwards
+  x <- gsub("[(]or .*","", x)
+  sub("^\\s+|\\s+$", "", x)
+}
+
+
+################################
+#rsp_test_profile
+
+#file:///C:/Users/trakradmin/Downloads/datatable.pdf
+
+
+##rsp_test_profile(aa)
+
+rsp_test_profile <- function(x){
+
+  #set up .value if not there
+  x <- rsp_tidy_profile(x)
+
+  #######################################
+  #track and return original class?
+  #      testing
+  #######################################
+  tmp <- class(x)
+  xx <- as.data.table(x)
+  out <- xx[,
+            .(PROFILE_NAME = PROFILE_NAME[1],
+              SPECIES_NAME = SPECIES_NAME[1],
+              SPEC_MW = SPEC_MW[1],
+              .total = sum(.value, na.rm = TRUE),
+              .value = mean(.value, na.rm = TRUE),
+              .n = length(.value[!is.na(.value)]),
+              .sd = sd(.value, na.rm = TRUE)
+            ),
+            by=.(PROFILE_CODE, SPECIES_ID)]
+
+  #might regret .value
+  out$WEIGHT_PERCENT <- out$.value
+  #note: I *think* this is fine as long as x is never permanently replaced
+  #      with the rsp_test_profile without a warning
+  #           might be cases where we want to change WEIGHT_PERCENT
+
+  #           change case making an average profile
+
+  #output
+  #might regret this...
+  #   but would like to leave user free to use e.g. dplyr rather than
+  #       data.table if that is their preference
+  out <- as.data.frame(out)
+  class(out) <- tmp
+  out
+}
+
+# now replaced with rsp_test_profile
+
+#rsp_test (old version)
+
+#base case idiot test
+
+#rsp_test <- function(x){
+#  .prf <- unique(x$PROFILE_CODE)
+#  ans <- lapply(.prf, function(y){
+#    temp <- subset(x, PROFILE_CODE==y)
+#    .spc <- unique(temp$SPECIES_ID)
+#    ans <- lapply(.spc, function(z){
+#      temp2 <- subset(temp, SPECIES_ID==z)
+#      data.frame(PROFILE_CODE = y,
+#                 PROFILE_NAME = temp2$PROFILE_NAME[1],
+#                 SPECIES_ID = z,
+#                 SPECIES_NAME = temp2$SPECIES_NAME[1],
+#                 COUNT = length(temp2$WEIGHT_PERCENT[!is.na(temp2$WEIGHT_PERCENT)]),
+#                 TOTAL = sum(temp2$WEIGHT_PERCENT[!is.na(temp2$WEIGHT_PERCENT)]),
+#                 SPEC_MW = temp2$SPEC_MW[1],
+#                 WEIGHT_PERCENT=mean(temp2$WEIGHT_PERCENT, na.rm=TRUE))
+#    })
+#    do.call(rbind, ans)
+#  })
+#  do.call(rbind, ans)
+#}
+
+#require(dplyr)
+#test1 <- function(x){
+#  x %>%
+#    group_by(PROFILE_CODE, SPECIES_ID) %>%
+#    summarise(PROFILE_NAME = PROFILE_NAME[1],
+#              SPECIES_NAME = SPECIES_NAME[1],
+#              SPEC_MW = SPEC_MW[1],
+#              total = sum(WEIGHT_PERCENT, na.rm=T),
+#              mean = mean(WEIGHT_PERCENT, na.rm=T),
+#             sd = sd(WEIGHT_PERCENT, na.rm=T),
+#             n = length(WEIGHT_PERCENT[!is.na(WEIGHT_PERCENT)]))
+#}
+#aa <- sp_profile(sp_find_profile("ae6", by="profile_type")$PROFILE_CODE)
+#require(data.table)
+#test2 <- function(x){
+#  #######################################
+#  #could track and return original class?
+#  #######################################
+#  xx <- as.data.table(x)
+#  out <- xx[,
+#            .(PROFILE_NAME = PROFILE_NAME[1],
+#              SPECIES_NAME = SPECIES_NAME[1],
+#              SPEC_MW = SPEC_MW[1],
+#              .total = sum(WEIGHT_PERCENT, na.rm = TRUE),
+#              WEIGHT_PERCENT = mean(WEIGHT_PERCENT, na.rm = TRUE),
+#              .n = length(WEIGHT_PERCENT[!is.na(WEIGHT_PERCENT)]),
+#              .sd = sd(WEIGHT_PERCENT, na.rm = TRUE)
+#            ),
+#            by=.(PROFILE_CODE, SPECIES_ID)]
+#  #output
+#  #currently data.frame
+#  #could be respeciate
+#  as.data.frame(out)
+#}
+
+
+
+
+
+
+
+
+
+#####################
+#testing
+#####################
+
+#playing
+
+#function(x, subset,...){
+#  ans <- match.call()
+#  ans <- as.character(ans)
+#  return(ans)
+#}
+
+#ggplot example
+#require(ggplot2)
+#ggplot() + geom_col(aes(y=SPECIES_NAME, x=WEIGHT_PERCENT), data=aa) + facet_grid(.~PROFILE_NAME)
+
+
+############################
+#color key
+############################
+
+########################
+#using this in:
+########################
+
+#sp_species_cor
+
+
+#started with:
+#https://stackoverflow.com/questions/9314658/colorbar-from-custom-colorramppalette
+
+#removed dev.new
+
+#see also regarding adding to existing plot
+#https://www.statmethods.net/advgraphs/layout.html
+
+# One figure in row 1 and two figures in row 2
+# row 1 is 1/3 the height of row 2
+# column 2 is 1/4 the width of the column 1
+##attach(mtcars)
+##layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE),
+##       widths=c(3,1), heights=c(1,2))
+##hist(wt)
+##hist(mpg)
+##hist(disp)
+
+# Add boxplots to a scatterplot
+##par(fig=c(0,0.8,0,0.8), new=TRUE)
+##plot(mtcars$wt, mtcars$mpg, xlab="Car Weight",
+##     ylab="Miles Per Gallon")
+##par(fig=c(0,0.8,0.55,1), new=TRUE)
+##boxplot(mtcars$wt, horizontal=TRUE, axes=FALSE)
+##par(fig=c(0.65,1,0,0.8),new=TRUE)
+##boxplot(mtcars$mpg, axes=FALSE)
+##mtext("Enhanced Scatterplot", side=3, outer=TRUE, line=-3)
+
+# Function to plot color bar
+
+#currently not exporting
+
+#notes
+#change min and max or .min and .max
+#move title into plot?
+#add key.style?
+#add key.position?
+#how to set plot range when na is included verus excluded
+#    needs to be a function of min/max range not 0.5, 1.5, etc
+#    maybe 0.25 ish??? maybe tick or half tick range????
+
+#key is the data to be color-scaled or its range
+#     currently just used for range
+
+#cols is col.palette to apply to key range
+#   the col ramp is messy... seems to need border
+#        that could be issue if transparency using...
+
+#x, y are key (proportional, 0 to 1) positions
+
+#na.col color of na's
+#na.cex scaling for na box width
+#    do we need any other NA controls??
+#          e.g. na.border if needed to be different to main border???
+
+#axes to do
+#    not using at moment
+#        but would separate axes and border colours
+#             not sure it is wanted/needed...
+
+#ticks, nticks axes tick positions and number of ticks, respectively
+#     think about these
+
+#title still work in progress
+#     not nice version on type=1
+
+
+#have type = 1/2 for horizontal/vertical
+#   think about types 3 and 4
+#        horizontal with text at top
+#        vertical with text on other side
+
+#think about exporting this once sorted???
+#   or passing to another package/better home
+#       (not sure how that fits with package remit)
+
+
+
+rsp_col_key <- function(key, cols, x, y = NULL,
+                        ticks, nticks,
+                        na.col = "grey", na.cex = 0.25,
+                        title = "", axes, bg, border,
+                        type = 2,
+                        ...){
+
+    #setup
+    op <- par(no.readonly = TRUE)
+
+    if(missing(x)){
+      #currently just doing this option
+      #like key.pos "top-left", key.style = 1 (horizontal, annotation below)
+      x <- 0.1
+    }
+    if(is.null(y)){
+      y <- 0.9
+    }
+    .min <- min(key, na.rm=TRUE)
+    .max <- max(key, na.rm=TRUE)
+    if(missing(ticks)){
+      ticks <- pretty(c(.min, .max), 3)
+    }
+    if(missing(nticks)){
+      nticks <- length(ticks)
+    }
+    .na.width <- na.cex * (.max-.min)
+    if(missing(bg)){
+      bg <- "white"
+    }
+    if(missing(border)){
+      border <- "black"
+    }
+    scale <- (length(cols)-1)/(.max-.min)
+
+#print(.max-.min)
+#print(.na.width)
+    #print(.min)
+    #print(.max)
+
+    #key.style 1
+    if(type==1){
+      #horizontal, header before, annotation after
+      #margins
+      .mai <- c(0.1,0.1,0.1,0.1)
+      if(title ==""){
+        #no title
+        .fig <- c(x-0.1, x+0.1, y-0.04, y+0.1)
+        .wdt <- 12
+      } else {
+        #title
+        .fig <- c(x-0.1, x+0.1, y-0.12, y+0.1)
+        .wdt <- 15
+      }
+
+      if(is.na(na.col)){
+        .brd <- c(.na.width, .na.width)
+      } else {
+        .brd <- c(.na.width, .na.width*2)
+      }
+
+      #position col key
+      par(fig = .fig, mai = .mai, new=TRUE)
+
+      #plot col key
+
+      #region
+      plot(c(.min -.brd[2], .max+(.brd[1]*0.5)), c(-1, .wdt),
+           type='n', bty='n', xaxt='n', xlab='',
+           yaxt='n', ylab='', main="", font.main = 1)
+      #bg + border
+      rect(.min -.brd[2], -1, .max+(.brd[1]*0.5),  .wdt,
+           col=bg, border=border)
+      #title
+      if(title !=""){
+        text(.min+((.max-.min)/2)+(.na.width*0.75), 13, labels=title, col="black", cex=0.75)
+      }
+      #col scale
+      for (i in 1:(length(cols)-0)) {
+        x <- (i-1)/scale + .min
+        rect(x,5,x+1/scale,10,col=cols[i], border=NA)
+      }
+      #axes
+      lines(c(.min, .max), c(5,5), col="black")
+      for (i in ticks) {
+        lines(c(i,i), c(5,4),col="black")
+      }
+      #axes annotation
+      text(ticks, rep(2, length(ticks)), labels=ticks,
+           cex=0.75, adj=0.5)
+      #na block
+      if(!is.na(na.col)){
+        rect(.min-(.na.width*0.5), 5,.min-(.na.width*1.5), 10,  col=na.col, border="black")
+        text(.min-.na.width, 2, labels="NA", col="black", cex=0.75)
+      }
+
+    }
+
+    if(type==2){
+      #horizontal, header before, annotation after
+      #margins
+      .mai <- c(0.1,0.1,0.1,0.1)
+      if(title ==""){
+        #no title
+        .fig <- c(x-0.05, x+0.05, y-0.2, y+0.1)
+        .wdt <- 12
+      } else {
+        #title
+        .fig <- c(x-0.05, x+0.05, y-0.2, y+0.1)
+        .wdt <- 15
+      }
+
+      if(is.na(na.col)){
+        .brd <- c(.na.width, .na.width)
+      } else {
+        .brd <- c(.na.width, .na.width*2)
+      }
+
+      #position col key
+      par(fig = .fig, mai = .mai, new=TRUE)
+
+      #plot col key
+
+      #region
+      plot(c(-1, .wdt), c(.min-.brd[2], .max+(.brd[1]*0.5)),
+           type='n', bty='n', xaxt='n', xlab='',
+           yaxt='n', ylab='', main="", font.main = 1)
+      #bg + border
+      rect(-1, .min-.brd[2], .wdt, .max+(.brd[1]*0.5),
+           col=bg, border=border)
+      #title
+      if(title !=""){
+        text(.min+((.max-.min)/2)+(.na.width*0.75), 13, labels=title,
+             col="black", cex=0.75)
+      }
+
+      #for (i in 1:(length(lut)-1)) {
+      #  y = (i-1)/scale + min
+      #  rect(0,y,10,y+1/scale, col=lut[i], border=NA)
+      #}
+
+      #col scale
+      ####################
+      #note
+      ####################
+      #this needs work because rect needs colored border
+      #which kills transparent ranges...
+      #does that matter
+      for (i in 1:(length(cols))) {
+        y <- (i-1)/scale + .min
+        rect(5,y-(1/scale),10,y,col=cols[i], border=cols[i])
+      }
+      #axes
+      lines(c(5,5), c(.min, .max), col="black")
+      for (i in ticks) {
+        lines(c(5,4), c(i,i), col="black")
+      }
+      #axes annotation
+      text(rep(2, length(ticks)), ticks, labels=ticks,
+           cex=0.75, adj=0.5)
+      #na block
+      if(!is.na(na.col)){
+        rect(5, .min-(.na.width*0.5), 10, .min-(.na.width*1.5), col=na.col, border="black")
+        text(2, .min-.na.width, labels="NA", col="black", cex=0.75)
+      }
+
+    }
+
+  par(op)
+}
+
+#plot(iris$Sepal.Length, iris$Sepal.Width)
+#rsp_col_key(c(1,-1), colorRampPalette(c("light green", "yellow", "orange", "red"))(100), title="testing")
+
+
+
+
+
+
+
+
+###########################
+#diagnostic
+##########################
+
+#######################
+##rethink this!!!!
+#######################
+
+#parking for now
+
+
+#https://www.rpubs.com/NYang/ANLY530-IRIS
+
+#devtools::unload(); devtools::load_all()
+#a <- xxx_test(); subset(a, a$.ref)$PROFILE_NAME
+
+#xxx_test <- function(){
+#
+#  .tmp <- sysdata$PROFILES
+#  .out <-as.data.table(sp_dcast_profile(sp_profile(.tmp$PROFILE_CODE)))
+#  .tmp <- .tmp[c("PROFILE_CODE", "Keywords")]
+#  .tmp$.ref <- grepl("wildfire", tolower(.tmp$Keywords)) |
+#    grepl("burning", tolower(.tmp$Keywords))
+#  .out <- merge(.out, .tmp)
+#  .out <- .out[-1599,]
+#  #glfit<-glm(as.numeric(.out$.ref)~.out$`Organic carbon` + .out$Nitrate +
+#  #             .out$Sulfate, family = 'binomial')
+#  glfit<-glm(as.numeric(.out$.ref)~.out$Calcium + .out$Lead +
+#                            .out$Zinc +.out$Manganese, family = 'binomial')
+#  print(summary(glfit))
+#  .out$.pred <- NA
+#  .out$.pred[as.numeric(names(predict(glfit)))] <- predict(glfit, type="response")
+#  .out
+#  #glfit
+#}
+
+
+
+######################################
+#matching
+######################################
+
+#Camp Fire
+
+#pb (lead) 0.04 - 0.13 ug/m3
+#zinc      0.15 - 0.45
+#manganese 0.008 - 0.012
+#calcium   0.1-0.2
+#organic carbon 20-100
+
+#
+
+
+
+########################################
+#pca
+########################################
+
+#see notes at
+#https://www.ime.usp.br/~pavan/pdf/PCA-R-2013
+#on r pca methods
+
+#https://stats.stackexchange.com/questions/59213/how-to-compute-varimax-rotated-principal-components-in-r
+#https://stats.stackexchange.com/questions/56089/using-varimax-rotated-pca-components-as-predictors-in-linear-regression
+#https://stackoverflow.com/questions/34944179/doing-pca-with-varimax-rotation-in-r
+#on varimax rotation
diff --git a/README.Rmd b/README.Rmd
index 1ac5a4c..43aeb7a 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -51,7 +51,7 @@ Plotting a profile
 plot(p)
 ```
 
-... or multiple profiles using barplot syntax
+... using barplot syntax
 
 ```{r fig.width=12, fig.height=5, fig.align="center", dpi=400}
 #using base barplot 
diff --git a/README.md b/README.md
index 749a4a0..974d2a3 100644
--- a/README.md
+++ b/README.md
@@ -22,14 +22,8 @@ library(respeciate)
 x <- sp_find_profile("Ethanol")
 x
 #> respeciate profile reference
-#> 0291 1070 1071 1132 1149 1301 1302 1303 1304 1314 5473 5474 5475 5477 5478 5479 
-#> 5481 5482 5483 5485 5486 5487 5489 5490 5491 5493 5494 5495 5496 5497 5498 5499 
-#> 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 
-#> 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 
-#> 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 
-#> 8200 8210 8215 8733 8736 8751 8751a 8752 8754 8755 8757 8758 8760 8761 8763 
-#> 8764 8765 8766 8767 8768 ...
-#>    > 160 profiles [showing first 100]
+#> 0291 1070 1071 1132 1149 1301 1302 1303 1304 1314 ...
+#>    > 160 profiles [showing first 10]
 ```
 
 ## speciate
@@ -49,7 +43,7 @@ plot(p)
 
 <img src="man/figures/unnamed-chunk-5-1.png" style="display: block; margin: auto;" />
 
-… or multiple profiles using barplot syntax
+… using barplot syntax
 
 ``` r
 #using base barplot 
diff --git a/docs/news/index.html b/docs/news/index.html
index adccae4..c172a4d 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">respeciate</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.1</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.2</span>
       </span>
     </div>
 
@@ -47,6 +47,37 @@ <h1 data-toc-skip>Changelog <small></small></h1>
       <small>Source: <a href="https://github.com/atmoschem/respeciate/blob/HEAD/NEWS.md" class="external-link"><code>NEWS.md</code></a></small>
     </div>
 
+    <div class="section level2">
+<h2 class="page-header" data-toc-text="0.0" id="version-00---release-notes">Version 0.0 - Release Notes<a class="anchor" aria-label="anchor" href="#version-00---release-notes"></a></h2>
+<ul><li>[0.2.2]
+<ul><li>released 2023-06-28</li>
+<li>now imports data.table; moving to data.table methods</li>
+<li>changed news format because build_news missing previous…</li>
+</ul></li>
+<li>[0.2.1]
+<ul><li>released 2023-06-21</li>
+<li>updated sysdata (now using SPECIATE 5.2)</li>
+<li>added sp_find_species</li>
+<li>extended sp_find_profile; now searches by species_name</li>
+<li>simplified plot.respeciate</li>
+</ul></li>
+<li>[0.2.0]
+<ul><li>released 2021-05-26</li>
+<li>Karl Ropkins joined</li>
+<li>moved your sysdata.rda to the package data folder</li>
+<li>reset lazy.data to TRUE<br></li>
+<li>added an object class</li>
+<li>added sp_find_profile (find_code but making object class)</li>
+<li>added sp_profile (spec but making object class)</li>
+<li>added crude print and plot methods for object classes</li>
+<li>updated date and version</li>
+<li>The new code is in R:speciate.0.2.r</li>
+</ul></li>
+<li>[0.1.0]
+<ul><li>released 2020-12-20<br></li>
+<li>Created respeciate</li>
+</ul></li>
+</ul></div>
   </div>
 
   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
diff --git a/man/respeciate-package.Rd b/man/respeciate-package.Rd
index 7136d18..2dde598 100644
--- a/man/respeciate-package.Rd
+++ b/man/respeciate-package.Rd
@@ -8,7 +8,7 @@
 \description{
 \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
 
-Acess to the US.EPA Speciate (v5.2) tool, to generate speciation for gases and particles. More details in Simon et al (2010) \doi{10.5094/APR.2010.026}.
+Acess to the US.EPA Speciate (v5.2) tool, to generate speciation profiles for gases and particles. More details in Simon et al (2010) \doi{10.5094/APR.2010.026}.
 }
 \seealso{
 Useful links:
diff --git a/man/respeciate.generics.Rd b/man/respeciate.generics.Rd
index 657e516..f84fffd 100644
--- a/man/respeciate.generics.Rd
+++ b/man/respeciate.generics.Rd
@@ -4,36 +4,36 @@
 \alias{respeciate.generics}
 \alias{print.respeciate}
 \alias{plot.respeciate}
-\alias{print.respeciate.ref}
-\alias{print.respeciate.spcs}
+\alias{summary.respeciate}
 \title{respeciate.generics}
 \usage{
-\method{print}{respeciate}(x, n = 6, ...)
+\method{print}{respeciate}(x, n = NULL, ...)
 
 \method{plot}{respeciate}(x, n = NULL, id = NULL, order = TRUE, ...)
 
-\method{print}{respeciate.ref}(x, ...)
-
-\method{print}{respeciate.spcs}(x, ...)
+\method{summary}{respeciate}(object, ...)
 }
 \arguments{
-\item{x}{the \code{respeciate} or \code{respeciate.ref}
+\item{x}{the \code{respeciate}
 object to be printed, plotted, etc.}
 
-\item{n}{when plotting or printing a multi-profile, the
-number(s) of profile(s) to report.}
+\item{n}{when plotting or printing a multi-profile object, the
+maximum number of profiles to report. (When plotting, \code{n}
+is ignored if \code{id} is also set.)}
 
 \item{...}{any extra arguments, mostly ignored except by
 \code{plot} which passes them to \code{\link{barplot}}.}
 
 \item{id}{numeric, indices of profiles to use when
-plotting (nb: \code{n=6} is equivalent to code{id=1:6}).}
+plotting (\code{id=1:6} is equivalent to \code{n=6}).}
 
 \item{order}{logical, order the species in the
 profile(s) by relative abundance before plotting.}
+
+\item{object}{like \code{x} but for \code{summary}.}
 }
 \description{
-respeciate object classes and generic functions
+\code{respeciate} object classes and generic functions.
 
 When supplied a \code{respeciate}
 object, \code{\link{print}} manages its appearance.
@@ -42,9 +42,6 @@ When supplied a \code{respeciate}
 object, \code{\link{plot}} provides a basic plot
 output. This uses base function \code{\link{barplot}};
 also see note.
-
-When supplied a \code{respeciate.ref}
-object, \code{\link{print}} manages its appearance.
 }
 \note{
 \code{respeciate} objects revert to
diff --git a/man/sp.001.Rd b/man/sp.001.Rd
deleted file mode 100644
index 0184a9a..0000000
--- a/man/sp.001.Rd
+++ /dev/null
@@ -1,75 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sp.001.R
-\name{sp.001}
-\alias{sp.001}
-\alias{sp_find_profile}
-\alias{sp_find_species}
-\alias{sp_profile}
-\title{sp_ functions}
-\usage{
-sp_find_profile(..., by = "keywords", partial = TRUE)
-
-sp_find_species(..., by = "species_name", partial = TRUE)
-
-sp_profile(code)
-}
-\arguments{
-\item{...}{for \code{sp_find} functions, character(s), any
-search term(s) to use when searching the local (re)SPECIATE archive for
-relevant records.}
-
-\item{by}{character, the section of the archive to
-search, by default \code{'keywords'} for \code{\link{sp_find_profile}} and
-\code{'species_names'} for \code{\link{sp_find_species}}.}
-
-\item{partial}{logical, if \code{TRUE} (default)
-\code{sp_find} functions use partial matching.}
-
-\item{code}{character or numeric, the SPECIATE code
-of the required profile (EPA SPECIATE term PROFILE_CODE).}
-}
-\value{
-\code{sp_find_profile} returns a object of
-\code{respeciate.ref} class, a \code{data.frame} of
-profile information.
-
-\code{sp_find_species} returns a object of
-\code{respeciate.spcs} class, a \code{data.frame} of
-species information.
-
-\code{sp_profile} returns a object of
-\code{respeciate} class, a \code{data.frame} containing a
-speciate profile.
-}
-\description{
-sp functions for use with (re)speciate data in R...
-
-\code{sp_find} functions search main data sets
-in the (re)SPECIATE archive using supplied search terms.
-\code{\link{sp_find_profile}} searches for profile records and
-\code{\link{sp_find_species}} searches for species records.
-
-\code{\link{sp_profile}} extracts a
-SPECIATE profile from the local (re)SPECIATE archive.
-}
-\examples{
-\dontrun{
-profile <- "Ethanol"
-dt <- sp_find_profile(profile)
-dt}
-
-\dontrun{
-species <- "Ethanol"
-sp <- sp_find_species(species)
-sp}
-
-\dontrun{
-x <- sp_profile(c(8833, 8850))
-plot(x)}
-}
-\references{
-Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D.,
-Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010.
-The development and uses of EPA SPECIATE database.
-Atmospheric Pollution Research, 1(4), pp.196-206.
-}
diff --git a/man/sp.Rd b/man/sp.Rd
new file mode 100644
index 0000000..8f4b3e3
--- /dev/null
+++ b/man/sp.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sp.R
+\name{sp}
+\alias{sp}
+\alias{sp_profile}
+\title{sp_ functions}
+\usage{
+sp_profile(code, include.refs = FALSE)
+}
+\arguments{
+\item{code}{character, numeric or data.frame, the SPECIATE code
+of the required profile (EPA SPECIATE identifier PROFILE_CODE). This is
+typically one or concatenated character or numeric entries, but can also
+be a \code{respeciate} object or similar \code{data.frame} containing
+the \code{code}s as a named \code{PROFILE_NAME} column.}
+
+\item{include.refs}{logical, include profile reference information when
+getting the requested profile(s) from the archive, default \code{FALSE}.}
+}
+\value{
+\code{sp_profile} returns a object of
+\code{respeciate} class, a \code{data.frame} containing a
+(re)SPECIATE profile.
+}
+\description{
+sp function to get profiles from the R (re)SPECIATE archive
+
+\code{\link{sp_profile}} extracts a
+SPECIATE profile from the local (re)SPECIATE archive.
+}
+\note{
+The option \code{include.refs} adds profile source reference
+information to the returned \code{respeciate} data set. The default option
+is to not include these because some profiles have several associated
+references and including these replicates records, once per reference.
+\code{respeciate} code is written to handle this but if you are developing
+own methods or code and include references in any profile build you may be
+biasing some analyses in favor of those multiple-reference profile unless
+you check and account such cases.
+}
+\examples{
+\dontrun{
+x <- sp_profile(c(8833, 8850))
+plot(x)}
+}
+\references{
+Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D.,
+Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010.
+The development and uses of EPA SPECIATE database.
+Atmospheric Pollution Research, 1(4), pp.196-206.
+}
diff --git a/man/sp.average.Rd b/man/sp.average.Rd
new file mode 100644
index 0000000..a2a967c
--- /dev/null
+++ b/man/sp.average.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sp.average.R
+\name{sp.average}
+\alias{sp.average}
+\alias{sp_average_profile}
+\title{speciate data averaging functions}
+\usage{
+sp_average_profile(x, code = NULL, name = NULL, method = 1, ...)
+}
+\arguments{
+\item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+profiles.}
+
+\item{code}{required character, the unique profile code to assign to the
+average profile.}
+
+\item{name}{character, the profile name to assign to the average
+profile. If not supplied, this defaults to a collapsed list of the codes
+of all the profiles averaged.}
+
+\item{method}{numeric, the averaging method to apply: Currently only 1 (default)
+\code{mean(x)}.}
+
+\item{...}{additional arguments, currently ignored}
+}
+\value{
+\code{sp_average_profile} returns a single profile average
+version of the supplied \code{respeciate} profile.
+}
+\description{
+Functions to build composite (re)SPECIATE profiles
+
+\code{sp_average_profile} generates an average composite
+of a supplied multi-profile \code{respeciate} object.
+}
+\note{
+In development function; arguments and outputs likely to be subject to
+change.
+
+This is one of the very few \code{respeciate} functions that modifies the
+\code{WEIGHT_PERCENT} column of the \code{respectiate} \code{data.frame}.
+}
diff --git a/man/sp.cluster.Rd b/man/sp.cluster.Rd
new file mode 100644
index 0000000..0e51b66
--- /dev/null
+++ b/man/sp.cluster.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sp.cluster.R
+\name{sp.cluster}
+\alias{sp.cluster}
+\alias{sp_profile_distance}
+\title{sp_profile clustering}
+\usage{
+sp_profile_distance(x, output = c("plot", "report"))
+}
+\arguments{
+\item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+profiles.}
+
+\item{output}{Character vector, required function output: \code{'report'} the
+calculated distance matrix; \code{'plot'} a heat map of that distance
+matrix.}
+}
+\value{
+Depending on the \code{output} option, \code{sp_profile_distance} returns
+one or more of the following: the correlation matrix, a heat map of the
+correlation matrix.
+}
+\description{
+sp_profile functions for studying similarities (or
+dissimilarities) within multi-profile (re)SPECIATE data sets
+
+\code{\link{sp_profile_distance}} calculates the statistical distance
+between re(SPECIATE) profiles, and clusters profiles according to nearness.
+}
+\note{
+Please note: function in development; structure and arguments may be
+subject to change.
+}
diff --git a/man/sp.cor.Rd b/man/sp.cor.Rd
new file mode 100644
index 0000000..046f5e3
--- /dev/null
+++ b/man/sp.cor.Rd
@@ -0,0 +1,66 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sp.cor.R
+\name{sp.cor}
+\alias{sp.cor}
+\alias{sp_species_cor}
+\title{(re)SPECIATE Species Correlations}
+\usage{
+sp_species_cor(
+  x,
+  min.n = 3,
+  cols = c("#80FFFF", "#FFFFFF", "#FF80FF"),
+  na.col = "#CFCFCF",
+  heatmap.args = TRUE,
+  key.args = TRUE,
+  report = "silent"
+)
+}
+\arguments{
+\item{x}{\code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+profiles.}
+
+\item{min.n}{\code{numeric} (default 3), the minimum number of species measurements
+needed in a profile for the function to use it in correlation calculations.
+Here, it should be noted that this does not guarantee the three matched
+pairs of measurements needed to calculate a correlation coefficient because
+not all profiles contain all species, so there may still be insufficient
+overlap on a case-by-case basis.}
+
+\item{cols}{a series of \code{numeric}, \code{character} or other class values
+that can be translated into a color gradient, used to color valid cases when
+generating plots and color keys, default \code{c("#80FFFF", "#FFFFFF", "#FF80FF")}
+equivalent to \code{\link{cm.colors}} output.}
+
+\item{na.col}{\code{numeric}, \code{character} or other class that can be
+translated into a single color, used to color \code{NA}s when generating
+plots and color keys, default grey \code{"#CFCFCF"}.}
+
+\item{heatmap.args}{\code{logical} or \code{list}, heat map settings. Options
+include \code{TRUE} (default) to generate the heat map without modification;
+\code{FALSE} to not plot it; or a list of heat map options to alter the plot
+default appearance. The plot, a standard heat map with the dendrograms
+removed, is generated using \code{\link[stats]{heatmap}}, so see associated
+documentation for valid options.}
+
+\item{key.args}{\code{logical} or \code{list}, color key settings if plotting
+the correlation matrix heat map. Options include \code{TRUE} (default) to
+generate the key without modification; \code{FALSE} to not include the key;
+or a list of options to alter the key appearance.}
+
+\item{report}{\code{logical} or \code{character}, the required function
+output. Options include: \code{'silent'} (default), to return the
+correlation matrix invisibly; \code{TRUE} to return the matrix
+(visibly); and, \code{FALSE} to not return it.}
+}
+\value{
+By default \code{sp_species_cor} invisibly returns the calculated
+correlation matrix a plots it as a heat map, but arguments including
+\code{heatmap} and \code{report} can be used to modify function outputs.
+}
+\description{
+sp_species functions for studying relationships between
+species in multi-profile (re)SPECIATE data sets.
+
+\code{\link{sp_species_cor}} generates a by-species correlation
+matrix of the supplied (re)SPECIATE data sets.
+}
diff --git a/man/sp.info.Rd b/man/sp.info.Rd
new file mode 100644
index 0000000..8126699
--- /dev/null
+++ b/man/sp.info.Rd
@@ -0,0 +1,63 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sp.info.R
+\name{sp.info}
+\alias{sp.info}
+\alias{sp_info}
+\alias{sp_profile_info}
+\alias{sp_species_info}
+\alias{sp_find_profile}
+\alias{sp_find_species}
+\title{re(SPECIATE) information}
+\usage{
+sp_info()
+
+sp_profile_info(..., by = "keywords", partial = TRUE)
+
+sp_find_profile(...)
+
+sp_species_info(..., by = "species_name", partial = TRUE)
+
+sp_find_species(...)
+}
+\arguments{
+\item{...}{character(s), any search term(s) to use when searching
+the local (re)SPECIATE archive for relevant records using
+\code{sp_profile_info} or \code{sp_species_info}.}
+
+\item{by}{character, the section of the archive to
+search, by default \code{'keywords'} for \code{sp_profile_info} and
+\code{'species_names'} for \code{sp_species_info}.}
+
+\item{partial}{logical, if \code{TRUE} (default)
+\code{sp_profile_info} or \code{sp_profile_info} use partial matching.}
+}
+\value{
+\code{sp_info} provides a brief version information report on the
+currently installed (re)SPECIATE archive.
+
+\code{sp_profile_info} returns a \code{data.frame} of
+profile information, as a \code{respeciate} object.
+\code{sp_species_info} returns a \code{data.frame} of
+species information as a \code{respeciate} object.
+}
+\description{
+Functions that provide (re)SPECIATE
+source information.
+\code{sp_info} generates a brief version report for the currently installed
+(re)SPECIATE data sets.
+\code{sp_profile_info} searches the currently installed (re)SPECIATE
+data sets for profile records.
+\code{sp_species_info} searches the currently installed (re)SPECIATE
+data sets for species records.
+}
+\examples{
+\dontrun{
+profile <- "Ethanol"
+pr <- sp_find_profile(profile)
+pr
+
+species <- "Ethanol"
+sp <- sp_find_species(species)
+sp}
+
+}
diff --git a/man/sp.match.Rd b/man/sp.match.Rd
new file mode 100644
index 0000000..d742325
--- /dev/null
+++ b/man/sp.match.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sp.match.R
+\name{sp.match}
+\alias{sp.match}
+\alias{sp_match_profile}
+\title{Find nearest matches from reference set of profiles}
+\usage{
+sp_match_profile(x, ref, matches = 10, rescale = 5, min.n = 8, test.x = FALSE)
+}
+\arguments{
+\item{x}{A \code{respeciate} object or similar \code{data.frame} containing
+a species profile to be compared with profiles in \code{ref}. If \code{x}
+contains more than one profile, these are averaged (using
+\code{\link{sp_average_profile}}), and the average compared.}
+
+\item{ref}{A \code{respeciate} object, a \code{data.frame} containing a
+multiple species profiles, to be used as reference library when identifying
+nearest matches for \code{x}.}
+
+\item{matches}{Numeric (default 10), the maximum number of profile matches to
+report.}
+
+\item{rescale}{Numeric (default 5), the data scaling method to apply before
+comparing \code{x} and profiles in \code{ref}: options 0 to 5 handled by
+\code{\link{sp_rescale}}.}
+
+\item{min.n}{\code{numeric} (default 8), the minimum number of paired
+species measurements in two profiles required for a match to be assessed.
+See also \code{\link{sp_species_cor}}.}
+
+\item{test.x}{Logical (default FALSE). The match process self-tests by adding
+\code{x} to \code{ref}, which should generate a perfect fit=1 score. Setting
+\code{test.x} to \code{TRUE} retains this as an extra record.}
+}
+\value{
+\code{sp_match_profile} returns a fit report: a \code{data.frame} of
+up to \code{n} fit reports for the nearest matches to \code{x} from the
+reference profile data set, \code{ref}.
+}
+\description{
+\code{sp_match_profile} compares a supplied species
+(re)SPECIATE profile and a reference set of supplied profiles and
+attempt to identify nearest matches on the basis of correlation
+coefficient.
+}
diff --git a/man/sp.pad.Rd b/man/sp.pad.Rd
new file mode 100644
index 0000000..ecdbb4e
--- /dev/null
+++ b/man/sp.pad.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sp.pad.R
+\name{sp.pad}
+\alias{sp.pad}
+\alias{sp_pad}
+\title{(re)SPECIATE profile padding functions}
+\usage{
+sp_pad(x, pad = "standard", drop.nas = TRUE)
+}
+\arguments{
+\item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+profiles.}
+
+\item{pad}{character, type of meta data padding, current options
+\code{'profile'}, \code{'species'}, \code{'weight'}, \code{reference},
+\code{'standard'} (default; all but \code{'reference'}), and \code{'all'}
+('all').}
+
+\item{drop.nas}{logical, discard any rows where \code{WEIGHT_PERCENT} is
+\code{NA}, default \code{TRUE}.}
+}
+\value{
+\code{sp_pad} returns \code{x}, with requested additional profile
+and species meta-data added as additional \code{data.frame} columns.
+See Note.
+}
+\description{
+Functions for padding \code{respeciate} objects.
+
+\code{sp_pad} pads a supplied (re)SPECIATE profile data set
+with profile and species meta-data.
+}
+\note{
+Some data handling can remove (re)SPECIATE meta-data,
+and \code{sp_pad}s provide a quick rebuild/repair. For example,
+\code{\link{sp_dcast}}ing to a (by-species or by-profile) widened
+form strips some meta-data, and padding is used as part of the
+\code{\link{sp_melt_wide}} and padding is used to re-add this meta-data
+when returning the data set to its standard long form.
+}
diff --git a/man/sp.pls.Rd b/man/sp.pls.Rd
new file mode 100644
index 0000000..0180a9f
--- /dev/null
+++ b/man/sp.pls.Rd
@@ -0,0 +1,122 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sp.pls.R
+\name{sp.pls}
+\alias{sp.pls}
+\alias{sp_pls_profile}
+\alias{pls_report}
+\alias{pls_refit_species}
+\alias{pls_fit_parent}
+\alias{pls_plot}
+\alias{pls_plot_species}
+\alias{pls_plot_profile}
+\title{(re)SPECIATE profile Positive Least Squares}
+\usage{
+sp_pls_profile(x, ref, power = 1, ...)
+
+pls_report(pls)
+
+pls_refit_species(pls, name, power = 1, ...)
+
+pls_fit_parent(
+  pls,
+  parent,
+  power = 1,
+  start = 100,
+  lower = 50,
+  upper = 200,
+  ...
+)
+
+pls_plot(pls, species = 1, type = 1, ...)
+
+pls_plot_species(pls, species, type = 1, ...)
+
+pls_plot_profile(pls, profile, log = FALSE, ...)
+}
+\arguments{
+\item{x}{A \code{respeciate} object, a \code{data.frame} of
+profiles in standard long form, intended for PLS modelling.}
+
+\item{ref}{A \code{respeciate} object, a \code{data.frame} of
+profiles also in standard long form, used as the set of candidate
+source profiles when fitting \code{x}.}
+
+\item{power}{A numeric, an additional factor to be added to
+weightings when fitting the PLS model. This is applied in the form
+\code{weight^power}, and increasing this, increases the relative
+weighting of the more heavily weighted measurements. Values in the
+range \code{1 - 2.5} are sometimes helpful.}
+
+\item{...}{additional arguments, currently ignored.}
+
+\item{pls}{A \code{sp_pls_profile} output, only used by \code{pls_}
+functions.}
+
+\item{name}{Character class, for \code{pls_refit_species} only, the
+name of the species to refit. All other species in the reference
+profiles are held 'as is' and the \code{name}d species is refit based on
+previous PLS estimated source contribution time-series, then these are
+in turn refit using the new profiles generated by re-distributing the
+\code{name}d species.}
+
+\item{parent}{(for \code{pls_fit_parent} only) a data.frame of measurements
+of an additional species. This is treated as the sample parent (or a proxy
+for this), e.g. the total particulate matter (PM) measurement for
+(percentage) PM profiles. It is added to \code{x}, dummy entries are added
+to \code{ref} with a start value of 100 (percent), and the model is refitted.}
+
+\item{start, lower, upper}{all numeric, the start value and lower and upper
+boundaries when fitting a parameter.}
+
+\item{species, profile}{(for \code{pls_plot}s only) numeric or character
+identifying the species or profile to plot. If numeric, these are treated
+as indices of the species or profile, respectively, in the PLS model; if
+character, species is treated as the name of species and profile is treated
+as the profile code. Both can be concatenation to product multiple plots and
+the special case \code{-1} is a short cut to all species or profiles,
+respectively.}
+
+\item{type}{(for \code{pls_plot}s only) numeric, the plot type if
+multiple options are available.}
+
+\item{log}{(for \code{pls_plot_profile} only) logical, if \code{TRUE} this
+applies 'log' scaling to the primary Y axes of the plot.}
+}
+\value{
+\code{sp_pls_profile} returns a list of nls models, one per
+profile/measurement set in \code{x}. The \code{pls_} functions work with
+these outputs. \code{pls_report} generates a \code{data.frame} of
+model outputs, and is used of several of the other \code{pls_}
+functions. \code{pls_refit_species} and \code{pls_fit_parent}
+return the supplied \code{sp_pls_profile} output, updated on the
+basis of the \code{pls_} function action. \code{pls_plot}s produce
+various plots commonly used in source apportionment studies.
+}
+\description{
+Functions for Positive Least Squares (PSL) fitting of
+(re)SPECIATE profiles
+
+\code{sp_pls_profile} builds PSL models for supplied profile(s) using
+the \code{\link{nls}} function, the 'port' algorithm and a lower
+limit of zero for all model outputs to enforce the positive fits. The
+modeled profiles are typically from an external source, e.g. a
+measurement campaign, and are fit as a linear additive series of reference
+profiles, here typically from (re)SPECIATE, to provide a measure of
+source apportionment based on the assumption that the profiles in the
+reference set are representative of the mix that make up the modeled
+sample. The \code{pls_} functions work with \code{sp_pls_profile}
+outputs, and are intended to be used when refining and analyzing
+these PLS models. See
+}
+\note{
+This implementation of PLS applies the following modeling constraints:
+
+1. The generate a model of \code{x} that is positively constained linear
+product of the profiles in \code{ref}, so outputs can only be
+zero or more.  Although the model is generated using \code{\link{nls}},
+which is a Nonlinear Least Squares (NLS) model, the fitting term applied
+in this case is linear.
+
+2. The number of species in \code{x} must be more that the number of
+profiles in \code{ref}, to reduce the likelihood of over-fitting.
+}
diff --git a/man/sp.rescale.Rd b/man/sp.rescale.Rd
new file mode 100644
index 0000000..a04852a
--- /dev/null
+++ b/man/sp.rescale.Rd
@@ -0,0 +1,59 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sp.rescale.R
+\name{sp.rescale}
+\alias{sp.rescale}
+\alias{sp_rescale}
+\alias{sp_rescale_profile}
+\alias{sp_rescale_species}
+\title{(re)SPECIATE profile rescaling functions}
+\usage{
+sp_rescale(x, method = 2, by = "species")
+
+sp_rescale_profile(x, method = 1, by = "profile")
+
+sp_rescale_species(x, method = 2, by = "species")
+}
+\arguments{
+\item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+profiles.}
+
+\item{method}{numeric, the rescaling method to apply:
+  1 \code{x/total(x)};
+  2 \code{x/mean(x)};
+  3 \code{x-min(x)/max(x)-min(x)};
+  4 \code{x-mean(x)/sd(x)};
+  5 \code{x/max(x)}.
+The alternative 0 returns the records to their original
+values.}
+
+\item{by}{character, when rescaling \code{x} with
+\code{\link{sp_rescale}}, the data type to group and rescale,
+currently \code{'species'} (default) or \code{'profile'}.}
+}
+\value{
+\code{sp_rescale} and \code{sp_rescale} return the
+\code{respeciate} profile with the percentage weight records rescaled using
+the requested method. See Note.
+}
+\description{
+Functions for rescaling
+
+\code{sp_rescale} rescales the percentage weight records in
+a supplied (re)SPECIATE profile data set. This can be by profile or species
+subsets, and \code{sp_rescale_profile} and \code{sp_rescale_species} provide
+short-cuts to these options.
+}
+\note{
+Data sometimes needs to be normalised, e.g. when applying some
+statistical analyses. Rather than modify the EPA records in the
+\code{WEIGHT_PERCENT} column, \code{respeciate} creates a duplicate column
+\code{.value} which is modified by operations like \code{sp_rescale_profile}
+and \code{sp_rescale_species}. This means rescaling is always applied to
+the source information, rather than rescaling an already rescaled value,
+and the EPA records are retained unaffected. So, the original source
+information can be easily recovered.
+}
+\references{
+Dowle M, Srinivasan A (2023). data.table: Extension of `data.frame`.
+  R package version 1.14.8, \url{https://CRAN.R-project.org/package=data.table}.
+}
diff --git a/man/sp.reshape.Rd b/man/sp.reshape.Rd
new file mode 100644
index 0000000..01f472f
--- /dev/null
+++ b/man/sp.reshape.Rd
@@ -0,0 +1,79 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sp.reshape.R
+\name{sp.reshape}
+\alias{sp.reshape}
+\alias{sp_dcast}
+\alias{sp_dcast_profile}
+\alias{sp_dcast_species}
+\alias{sp_melt_wide}
+\title{(re)SPECIATE profile reshaping functions}
+\usage{
+sp_dcast(x, widen = "species")
+
+sp_dcast_profile(x, widen = "profile")
+
+sp_dcast_species(x, widen = "species")
+
+sp_melt_wide(x, pad = TRUE, drop.nas = TRUE)
+}
+\arguments{
+\item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+profiles in standard long form or widened form for
+\code{\link{sp_dcast}} and \code{\link{sp_melt_wide}}, respectively.}
+
+\item{widen}{character, when widening \code{x} with
+\code{\link{sp_dcast}}, the data type to \code{dcast},
+currently \code{'species'} (default) or \code{'profile'}. See Note.}
+
+\item{pad}{logical or character, when \code{melt}ing a previously widened
+data set, should output be re-populated with species and/or profile
+meta-data, discarded when widening. This is currently handled by
+\code{\link{sp_pad}}. The default \code{TRUE} applies standard settings,
+so does not include profile sources reference meta-data. (See
+\code{\link{sp_pad}} for other options).}
+
+\item{drop.nas}{logical, when \code{melt}ing a previously widened
+data set, should output be stripped of any rows containing empty
+weight/value columns. Because not all profile contains all species, the
+\code{dcast}/\code{melt} process can generate empty rows, and this step
+attempt account for that when working with standard re(SPECIATE)
+profiles. It is, however, sometimes useful to check first, e.g. when
+building profiles yourself.}
+}
+\value{
+\code{sp_dcast} returns the wide form of the supplied
+\code{respeciate} profile. \code{sp_melt_wide}
+returns the (standard) long form of a previously widened profile.
+}
+\description{
+Functions for reshaping (re)SPECIATE profiles
+
+\code{sp_dcast} and \code{sp_melt_wide} reshape supplied
+(re)SPECIATE profile(s). \code{sp_dcast} converts these from their supplied
+long form to a widened form, \code{dcast}ing the data set by either species
+or profiles depending on the \code{widen} setting applied.
+\code{sp_dcast_profile} and \code{sp_dcast_species} are wrappers for these
+options. \code{sp_melt_wide} attempts to return a previously widened data
+set to the original long form.
+}
+\note{
+Conventional long-to-wide reshaping of data, or \code{dcast}ing, can
+be slow and memory inefficient. So, \code{respeciate} uses the
+\code{\link[data.table:dcast]{data.table::dcast}}
+method. The \code{sp_dcast_species} method,
+applied using \code{widen='species'}, is effectively:
+
+\code{dcast(..., PROFILE_CODE+PROFILE_NAME~SPECIES_NAME, value.var="WEIGHT_PERCENT")}
+
+And, the alternative \code{widen='profile'}:
+
+\code{dcast(..., SPECIES_ID+SPECIES_NAME~PROFILE_CODE, value.var="WEIGHT_PERCENT")}
+
+Although, \code{respeciate} uses a local version of \code{WEIGHT_PERCENT} called
+\code{.value}, so the EPA source information can easily be recovered. See also
+\code{\link{sp_rescale_profile}}.
+}
+\references{
+Dowle M, Srinivasan A (2023). _data.table: Extension of `data.frame`_.
+  R package version 1.14.8, <https://CRAN.R-project.org/package=data.table>.
+}
diff --git a/man/spq.Rd b/man/spq.Rd
new file mode 100644
index 0000000..61a4a3c
--- /dev/null
+++ b/man/spq.Rd
@@ -0,0 +1,48 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/spq.R
+\name{spq}
+\alias{spq}
+\alias{spq_gas}
+\alias{spq_other}
+\alias{spq_pm}
+\alias{spq_pm.ae6}
+\alias{spq_pm.ae8}
+\alias{spq_pm.cr1}
+\alias{spq_pm.simplified}
+\title{spq_ quick access to common re(SPECIATE) sub-samples}
+\usage{
+spq_gas()
+
+spq_other()
+
+spq_pm()
+
+spq_pm.ae6()
+
+spq_pm.ae8()
+
+spq_pm.cr1()
+
+spq_pm.simplified()
+}
+\value{
+\code{spq_} functions typically return a \code{respeciate}
+\code{data.frame} of the requested profiles.
+
+For example:
+
+\code{sqr_gas} returns all gaseous profiles (\code{PROFILE_TYPE == 'GAS'}).
+
+\code{sqr_pm} returns all particulate matter (PM) profiles not classified
+as a special PM type (\code{PROFILE_TYPE == 'PM'}).
+
+The special PM types are subsets profiles intended for special
+applications, and these include \code{sqr_pm.ae6} (type \code{PM-AE6}),
+\code{sqr_pm.ae8} (type \code{PM-AE8}), \code{sqr_pm.cr1} (type
+\code{PM-CR1}), \code{sqr_pm.simplified} (type \code{PM-Simplified})
+and \code{sqr_other} (\code{PROFILE_TYPE == 'OTHER'}).
+}
+\description{
+\code{spq_} functions are quick access wrappers to commonly
+requested re(SPECIATE) sub-samples.
+}
diff --git a/man/spx.Rd b/man/spx.Rd
new file mode 100644
index 0000000..f520964
--- /dev/null
+++ b/man/spx.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/spx.R
+\name{spx}
+\alias{spx}
+\alias{spx_copy}
+\alias{spx_}
+\alias{spx_n_alkane}
+\alias{spx_btex}
+\title{spx_ functions for grouping and subsetting}
+\usage{
+spx_copy(x, ref = NULL, by = "species_id")
+
+spx_n_alkane(x)
+
+spx_btex(x)
+}
+\arguments{
+\item{x}{a \code{respeciate} object, a \code{data.frame} of re(SPECIATE)
+profiles.}
+
+\item{ref}{(\code{spx_copy} only) a second \code{respeciate} object, to
+be used as reference when testing \code{x}.}
+
+\item{by}{(\code{spx_copy} only) character, the name of the column
+in \code{ref} to copy when testing \code{x}.}
+}
+\value{
+\code{spx_copy} outputs can be modified but, by default, it
+identifies all species in the supplied reference data set.
+
+\code{spx_n_alkane} identifies C1 to C40 n-alkanes.
+
+\code{spx_btex} identifies the BTEX group of aromatic hydrocarbons
+(benzene, toluene, ethyl benzene, and M-, O- and P-xylene).
+}
+\description{
+\code{spx_} functions generate a vector of assignment
+terms and can be used to subset or condition a supplied re(SPECIATE)
+\code{data.frame}.
+
+Most commonly, the \code{spx_} functions accept a single input, a
+re(SPECIATE) \code{data.frame} and return a logical vector of
+length \code{nrow(x)}, identifying species of interest as
+\code{TRUE}. So, for example, they can be used when
+\code{\link{subset}}ting in the form:
+
+\code{subset(x, spx_n_alkane(x))}
+
+... to extract just n-alkane records from a \code{respeciate} object
+\code{x}.
+
+However, some accept additional arguments. For example, \code{spx_copy}
+also accepts a reference data set, \code{ref}, and a column identifier,
+\code{by}, and tests \code{x$by \%in\% unique(ref$by)}.
+}
diff --git a/man/sysdata.Rd b/man/sysdata.Rd
index 3aa075d..b0af5e1 100644
--- a/man/sysdata.Rd
+++ b/man/sysdata.Rd
@@ -1,18 +1,43 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/sysdata.R
+\docType{data}
 \name{sysdata}
 \alias{sysdata}
 \title{sysdata}
 \format{
-A ( 5  long) 'list' object
+A (long) list, containing:
 \describe{
-  \item{PROFILES}{[to document]}
-  \item{SPECIES}{[to document]}
-  \item{SPECIES_PROPERTIES}{[to document]}
-  \item{PROFILE_REFERENCE}{[to document]}
-  \item{REFERENCES}{[to document]}
+  \item{PROFILES}{The main \code{data.frame} of profile-specific meta-data,
+  with one row per profile, key term \code{PROFILE_CODE}.}
+  \item{SPECIES}{The main \code{data.frame} of individual record meta-data,
+  with one row per species in each profile, key terms \code{PROFILE_CODE}
+  and \code{SPECIES_ID} linking \code{PROFILES} and
+  \code{SPECIES_PROPERTIES}.}
+  \item{SPECIES_PROPERTIES}{The main \code{data.frame} of species-specific
+  meta-data, with one row per species, key term \code{SPECIES_ID}.}
+  \item{PROFILE_REFERENCE}{The \code{data.frame} linking profile and
+  reference meta-data, one row per references per profile, key terms
+  \code{PROFILE_CODE} and \code{REF_Code}.}
+  \item{REFERENCES}{The main \code{data.frame} of references for profile
+  source meta-data, one row per reference, key term \code{REF_Code}.}
+  \item{And others}{Currently not documented.}
 }
 }
+\source{
+https://www.epa.gov/air-emissions-modeling/speciate
+}
+\usage{
+sysdata
+}
 \description{
-splat_packed data [to be described]
+the re(SPECIATE) sysdata, a local versions of SPECIATE,
+the EPA's repository of organic gas and particulate matter (PM) speciation
+profiles of air pollution sources.
+}
+\references{
+Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D.,
+Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010.
+The development and uses of EPA SPECIATE database.
+Atmospheric Pollution Research, 1(4), pp.196-206.
 }
+\keyword{datasets}