diff --git a/DESCRIPTION b/DESCRIPTION
index d4c9df46..3d86a5d9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -15,13 +15,15 @@ Authors@R: c(person(given = "Laurent", family = "Gatto",
                     email = "mail@sebastiangibb.de",
                     role = c("aut", "cre"),
 		            comment = c(ORCID = "0000-0001-7406-4443")),
-             person(given = "Sigurdur", family = "Smarason", role = "ctb")
+             person(given = "Sigurdur", family = "Smarason", role = "ctb"),
+	     person(given = "Thomas", family = "Naake", role = "ctb")
             )
 Author: Laurent Gatto, Johannes Rainer and Sebastian Gibb.
 Maintainer: Laurent Gatto <laurent.gatto@uclouvain.be>
 Depends:
     R (>= 3.6.0)
 Imports:
+    igraph,
     methods,
     S4Vectors
 Suggests:
diff --git a/NAMESPACE b/NAMESPACE
index b6ed691d..6283e17e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -10,12 +10,15 @@ export(coefMA)
 export(coefSG)
 export(coefWMA)
 export(common)
+export(dotproduct)
+export(graphPeaks)
 export(join)
 export(localMaxima)
 export(noise)
 export(ppm)
 export(rbindFill)
 export(refineCentroids)
+export(shiftMatrix)
 export(smooth)
 export(valleys)
 export(vapply1c)
@@ -24,6 +27,8 @@ export(vapply1l)
 importClassesFrom(S4Vectors,Rle)
 importFrom(S4Vectors,Rle)
 importFrom(S4Vectors,nrun)
+importFrom(igraph,components)
+importFrom(igraph,graph_from_adjacency_matrix)
 importFrom(methods,as)
 importFrom(methods,is)
 importFrom(stats,filter)
diff --git a/NEWS.md b/NEWS.md
index 5b1b34f5..9eab4988 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,7 @@
 
 - Add `asInteger` and `rbindFill`.
 - Add `asRle`, `asRleDataFrame` and `asVectorDataFrame`.
+- Add `dotproduct`
 
 ## MsCoreUtils 0.0.1
 
diff --git a/R/dotproduct.R b/R/dotproduct.R
new file mode 100644
index 00000000..d6e5a8b4
--- /dev/null
+++ b/R/dotproduct.R
@@ -0,0 +1,102 @@
+#' @title Calculate the normalized dot product
+#' 
+#' @description 
+#' Calculate the normalized dot product (NDP).`dotproduct` returns a numeric 
+#' value ranging between 0 and 1, where 0 indicates no similarity between the 
+#' two MS/MS features, while 1 indicates that the MS/MS features are identical. 
+#' 
+#' @param 
+#' x `matrix` with two column where one contains m/z values (column `"mz"`) and 
+#' the second corresponding intensity values (column `"intensity"`)
+#' 
+#' @param 
+#' y `matrix` with two column where one contains m/z values (column `"mz"`) and 
+#' the second corresponding intensity values (column `"intensity"`)
+#' 
+#' @param m `numeric(1)`, exponent for peak intensity-based weights
+#' 
+#' @param n `numeric(1)`, exponent for m/z-based weights
+#' 
+#' @details 
+#' Each row in `x` corresponds to the respective row in `y`, i.e. the peaks 
+#' (entries `"mz"`) per spectrum have to match.
+#' 
+#' `m` and `n` are weights given on the peak intensity and the m/z values 
+#' respectively. As default (`m = 0.5`), the square root of the intensity 
+#' values are taken to calculate weights. With increasing values for `m`, high
+#' intensity values become more important for the similarity calculation, 
+#' i.e. the differences between intensities will be aggravated. 
+#' With increasing values for `n`, high m/z values will be taken more into 
+#' account for similarity calculation. Especially when working with small 
+#' molecules, a value `n > 0` can be set, to give a weight on the m/z values to 
+#' accommodate that shared fragments with higher m/z are less likely and will 
+#' mean that molecules might be more similar. If `n != 0`, a warning will be 
+#' raised if the corresponding m/z values are not identical, since small 
+#' differences in m/z values will distort the similarity values with increasing
+#' `n`. If `m=0` or `n=0`, intensity values or m/z values, respectively, are not 
+#' taken into account.
+#' 
+#' The normalized dot product is calculated according to: 
+#' \deqn{NDP = \frac{\sum(W_{S1, i} \cdot W_{S2, i}) ^ 2}{ \sum(W_{S1, i} ^ 2) * \sum(W_{S2, i} ^ 2) }}{\sum(W_{S1, i} \cdot W_{S2, i}) ^ 2 \sum(W_{S1, i} ^ 2) * \sum(W_{S2, i} ^ 2)},
+#' with \eqn{W = [ peak intensity] ^{m} \cdot [m/z]^n}. 
+#' For further information on normalized dot product see for example
+#' Li et al. (2015).
+#' Prior to calculating \deqn{W_{S1}} or \deqn{W_{S2}}, all intensity values 
+#' are divided by the maximum intensity value and multiplied by 100.
+#' 
+#' @references 
+#' Li et al. (2015): Navigating natural variation in herbivory-induced
+#' secondary metabolism in coyote tobacco populations using MS/MS structural 
+#' analysis. PNAS, E4147--E4155, DOI: 10.1073/pnas.1503106112.
+#' 
+#' @return 
+#' `numeric(1)`, `dotproduct` returns a numeric similarity coefficient between 
+#' 0 and 1.
+#' 
+#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}
+#' 
+#' @examples 
+#' x <- matrix(c(c(100.002, 100.001, NA, 300.01, 300.02, NA), 
+#'         c(2, 1.5, 0, 1.2, 0.9, 0)), ncol = 2,)
+#' y <- matrix(c(c(100.0, NA, 200.0, 300.002, 300.025, 300.0255),
+#'         c(2, 0, 3, 1, 4, 0.4)), ncol = 2)       
+#' colnames(x) <- colnames(y) <- c("mz", "intensity")
+#' dotproduct(x, y, m = 0.5, n = 0) 
+#' 
+#' @export
+dotproduct <- function(x, y, m = 0.5, n = 0) {
+    
+    ## check valid input 
+    if (!is.matrix(x)) stop("'x' is not a matrix")
+    if (!is.matrix(y)) stop("'y' is not a matrix")
+    
+    if (nrow(x) != nrow(y)) stop("nrow(x) and nrow(y) are not identical")
+    if (!is.numeric(m) || length(m) != 1) 
+        stop("`m` has to be a numeric of length 1.")
+    if (!is.numeric(n) || length(n) != 1) 
+        stop("`n` has to be a numeric of length 1.")
+    
+    ## retrieve m/z and intensity from x and y
+    mz1 <- x[, "mz"]
+    mz2 <- y[, "mz"]
+    inten1 <- x[, "intensity"]
+    inten2 <- y[, "intensity"]
+    
+    ## check mz values: if mz1 and mz2 are not identical and the values are
+    ## weighted by n, this might to unexpected results in the similarity
+    ## calculation
+    if (n && any(mz1 != mz2, na.rm = TRUE)) 
+        warning("m/z values in `x` and `y` are not identical. ",
+            "For n != 0 this might yield unexpected results.")    
+        
+    inten1 <- inten1 / max(inten1, na.rm = TRUE) * 100
+    inten2 <- inten2 / max(inten2, na.rm = TRUE) * 100
+    
+    ws1 <- inten1 ^ m * mz1 ^ n
+    ws2 <- inten2 ^ m * mz2 ^ n
+    
+    ## calculate normalized dot product 
+    dp <- sum(ws1 * ws2, na.rm = TRUE)
+    dp ^ 2 / (sum(ws1 ^ 2, na.rm = TRUE) * sum(ws2 ^ 2, na.rm = TRUE))
+}
+
diff --git a/R/graphPeaks.R b/R/graphPeaks.R
new file mode 100644
index 00000000..581f9e9d
--- /dev/null
+++ b/R/graphPeaks.R
@@ -0,0 +1,337 @@
+#' @importFrom igraph graph_from_adjacency_matrix components
+
+#' @name graphPeaks
+#' 
+#' @title
+#' Match two spectra using bipartite networks and combinatorics
+#' 
+#' @description
+#' Function to match spectrum objects using bipartite networks and combinations
+## to match peaks. `graphPeaks` takes two objects, `x` and 
+#' `y` as input that contain spectral information. The matching 
+#' is a multi-step procedure: 
+#' 1) filtering based on `ppm`,
+#' 2) retain order of matches between features of `x` and 
+#' `y` (remove crossing edges that violate the order of matching
+#' m/z),
+#' 3) calculate all combinations of the remaining possibilities.  
+#' 
+#' @param
+#' x `matrix`, the first column (`"mz"`) contains m/z value and the 
+#' second column (`"intensity"`) contains the corresponding intensity values 
+#'                         
+#' @param
+#' y `matrix`, the first column (`"mz"`) contains m/z value and the 
+#' second column (`"intensity"`) contains the corresponding intensity values
+#'                      
+#' @param
+#' ppm `numeric`, tolerance parameter in ppm to match corresponding peaks
+#' 
+#' @param 
+#' FUN function to calculate similarity between spectra
+#' 
+#' @param
+#' ... additional parameters passed to `FUN`
+#' 
+#' @details
+#' Objective function is highest similarites between the two 
+#' spectral objects, i.e. `FUN` is calculated over all combinations and 
+#' the similarity of the combination that yields the highest similarity is 
+#' returned. 
+#' 
+#' @return
+#' list with elements `x` and `y` each being a matrix with columns `"mz"` 
+#' and `"intensity"`. Each row (peak) in `x` matches the row (peak) in `y`
+#' 
+#' @author
+#' Thomas Naake \email{thomasnaake@@googlemail.com}
+#' 
+#' @examples
+#' x <- matrix(c(c(100.001, 100.002, 300.01, 300.02),
+#'     c(1, 1, 1, 1)), ncol = 2, nrow = 4, byrow = FALSE)
+#' colnames(x) <- c("mz", "intensity")
+#' 
+#' y <- matrix(c(c(100.0, 200.0, 300.002, 300.025, 300.0255),
+#'     c(1, 1, 1, 1, 1)), ncol = 2, nrow = 5, byrow = FALSE)
+#' colnames(y) <- c("mz", "intensity")
+#' graphPeaks(x = x, y = y, ppm = 20, FUN = dotproduct)
+#' @export
+graphPeaks <- function(x, y, ppm = 20, FUN = dotproduct, ...) {
+    
+    if (!is.matrix(x)) stop("x is not a matrix")
+    if (!is.matrix(y)) stop("y is not a matrix")
+    if (mode(x) != "numeric") stop("mode(x) is not 'numeric'")
+    if (mode(y) != "numeric") stop("mode(y) is not 'numeric'")
+    if (!all(colnames(x) %in% c("mz", "intensity"))) stop("colnames(x) are not 'mz' and 'intensity'")
+    if (!all(colnames(y) %in% c("mz", "intensity")))stop("colnames(y) are not 'mz' and 'intensity'")
+    if (!is.numeric(ppm)) stop("ppm is not numeric")
+    if (ppm < 0) stop("ppm has to be positive")
+    
+    
+    ## re-set colnames for x and y and order
+    rownames(x) <- paste("sp1_", 1:nrow(x),sep = "")
+    rownames(y) <- paste("sp2_", 1:nrow(y),sep = "")
+    if (nrow(x) > 1) x <- x[order(x[, 1]), ]
+    if (nrow(y) > 1) y <- y[order(y[, 1]), ]
+    
+    ## 1) create adjacency matrix and remove edges within x and 
+    ## within y
+    w <- matrix(1, ncol = nrow(x) + nrow(y), 
+                nrow = nrow(x) + nrow(y),
+                dimnames = list(c(rownames(x), rownames(y)),
+                              c(rownames(x), rownames(y))
+                ))
+    
+    ## 2) remove edges that are not in a certain range
+    ## calculate upper and lower m/z based on ppm parameter
+    ppm_1_1 <- x[, 1] / abs(ppm / 10 ^ 6  - 1)
+    ppm_1_2 <- x[, 1] / abs(ppm / 10 ^ 6  + 1)
+    ppm_2_1 <- y[, 1] / abs(ppm / 10 ^ 6  - 1)
+    ppm_2_2 <- y[, 1] / abs(ppm / 10 ^ 6  + 1)
+    
+    ## set names 
+    names(ppm_1_1) <- rownames(x)
+    names(ppm_1_2) <- rownames(x)
+    names(ppm_2_1) <- rownames(y)
+    names(ppm_2_2) <- rownames(y)
+    
+    mat1 <- apply(as.matrix(ppm_1_2), 1, function(a) a <= c(ppm_1_1, ppm_2_1))
+    mat2 <- apply(as.matrix(ppm_1_1), 1, function(a) a >= c(ppm_1_2, ppm_2_2))
+    
+    link_ppm <- mat1 * mat2
+    w[rownames(link_ppm), colnames(link_ppm)] <- link_ppm
+    w[colnames(link_ppm), rownames(link_ppm)] <- t(link_ppm)
+    w[rownames(x), rownames(x)] <- 0
+    w[rownames(y), rownames(y)] <- 0
+    
+    ## obtain network components from w
+    net <- graph_from_adjacency_matrix(w, weighted = NULL, mode = "undirected")
+    comp <- components(net)
+    
+    ## 3) get all possible combinations within one component
+    ## res will contain all combinations within one component
+    res <- vector("list", length(comp$csize)) 
+    
+    ## write to res combinations where csize == 2
+    inds_1 <- which(comp$csize == 1)
+    res[inds_1] <- lapply(inds_1, function(a) {
+        ms <- names(which(comp$membership == a))
+        if (grepl(x = ms, pattern = "sp1")) {
+            ms <- c(ms, NA)
+        } else{
+            ms <- c(NA, ms)
+        }
+        matrix(ms, ncol = 2)
+    })
+    ## write to res combinations where csize == 2
+    inds_2 <- which(comp$csize == 2)
+    res[inds_2] <- lapply(inds_2, function(a) matrix(names(which(comp$membership == a)), ncol = 2))
+    
+    ## get combinations where csize > 2
+    inds <- which(comp$csize > 2)
+    
+    for (i in inds) {
+        
+        ## separate component and create two matrices from x and 
+        ## y that only contain component menbers
+        names_ind_i <- names(which(comp$membership == i))
+        x_ind <- x[names_ind_i[names_ind_i %in% rownames(x)], ]
+        x_ind <- matrix(x_ind, ncol = 2,
+                        dimnames = list(names_ind_i[names_ind_i %in% rownames(x)], c("", "")))
+        y_ind <- y[names_ind_i[names_ind_i %in% rownames(y)], ]
+        y_ind <- matrix(y_ind, ncol = 2,
+                        dimnames = list(names_ind_i[names_ind_i %in% rownames(y)], c("", ""))) 
+        
+        ## allocate to c1 and c2 the names (colnames of x and y 
+        ## that are in the specific component)
+        if (nrow(x_ind) < nrow(y_ind)) {
+            c1 <- rownames(x_ind); c2 <- rownames(y_ind)
+            c1 <- c(c1, rep("NA", length(c2)-length(c1))) 
+            c1_c2 <- lapply(c1, function(a) expand.grid(a, c2))
+        } else {
+            c1 <- rownames(y_ind); c2 <- rownames(x_ind)
+            c1 <- c(c1, rep("NA", length(c2)-length(c1))) 
+            c1_c2 <- lapply(c2, function(a) expand.grid(a, c1))
+        }
+        
+        c1_c2_paste <- lapply(c1_c2, function(a) apply(a, 1, 
+            function(b) paste(b, collapse = " & ")))
+        
+        ## calculate all possible combinations
+        res_i <- as.matrix(expand.grid(c1_c2_paste, stringsAsFactors = FALSE))
+        ## write rows to list
+        res_i <- split(res_i, row(res_i))
+        ## strsplit " & ", unlist and write to matrix
+        res_i <- lapply(res_i, strsplit, split = " & ")
+        res_i <- lapply(res_i, unlist)
+        res_i <- matrix(unlist(res_i), nrow = length(res_i), byrow = TRUE)
+        
+        ## remove rows that contain duplicated values 
+        res_i <- res_i[!apply(apply(res_i, 1, duplicated), 2, any), ]
+        
+        ## filtering for crossing matching: retain order of m/z
+        seqs <- seq(2, ncol(res_i), by = 2)
+        if (ncol(res_i) > 2) {
+            ## do if there is only a multiple mapping:
+            ## check order of sp2s, they have to ascend, remove those that
+            ## do not ascend
+            crosses <- lapply(
+                as.data.frame(t(res_i[, seqs]), stringsAsFactors = FALSE),
+                function(a) as.numeric(substr(a, 5, nchar(a))))
+            ## check where all are TRUE, remove before NAs
+            crosses <- lapply(crosses, function(a) {
+                a <- a[!is.na(a)]
+                all(a == sort(a))
+            })
+            res_i <- matrix(res_i[unlist(crosses), ], ncol = ncol(res_i))
+            
+            ## create shifted matrices, do not use last element since it contains
+            ## only NAs
+            shift_right <- lapply(seqs[-length(seqs)], 
+                function(a) shiftMatrix(res_i, seqs, a / 2))
+            shift_left <- lapply(seqs[-length(seqs)], 
+                function(a) shiftMatrix(res_i, seqs, -a / 2))
+            
+            ## write to matrix
+            mat_shift_left <- lapply(shift_left, function(a) {
+                res_i[, seqs] <- a
+                return(res_i)
+            })
+            mat_shift_right <- lapply(shift_right, function(a) {
+                res_i[, seqs] <- a
+                return(res_i)
+            })
+            
+            ## rbind the lists
+            mat_shift_left <- do.call(rbind, mat_shift_left)
+            mat_shift_right <- do.call(rbind, mat_shift_right)
+            
+            ## the ones that are shifted out, link to NA and bind to NA
+            ## for mat_shift_left (take from shift_right and inverse)
+            add_left <- shift_right[length(shift_right):1]
+            add_left <- lapply(add_left, function(x) x[, -1])
+            
+            mat_add_left <- mat_add_right <- matrix("NA", 
+                ncol = length(shift_left) * 2, nrow = nrow(mat_shift_left))
+            mat_add_left[, seqs[-length(seqs)]] <- unlist(add_left)
+            
+            ## for mat_shift_right (take from shift_left and inverse)
+            add_right <- shift_left[length(shift_left):1]
+            add_right <- lapply(add_right, 
+                function(x) matrix(x[, ncol(x):1], ncol = ncol(x)))
+            add_right <- lapply(add_right, function(x) x[, -1])
+            mat_add_right[, seqs[-length(seqs)]] <- unlist(add_right)
+            
+            ## cbind with mat_add_left and mat_add_right
+            mat_shift_left <- cbind(mat_shift_left, mat_add_left)
+            mat_shift_right <- cbind(mat_shift_right, mat_add_right)
+            
+            ## add to res_i
+            res_i <- cbind(res_i, matrix("NA", nrow = nrow(res_i), ncol = ncol(mat_add_left)))
+            
+            ## assign to res
+            res[[i]] <- rbind(res_i, mat_shift_left, mat_shift_right)
+        } else {
+            res[[i]] <- res_i
+        }
+    }
+    
+    ## create combinations between rows
+    res_paste <- lapply(res, function(a) apply(a, 1, paste, collapse = " & "))
+    
+    ## 4) calculate all possible combinations between the components
+    res_exp <- as.matrix(expand.grid(res_paste, stringsAsFactors = FALSE))
+    ## write rows to list
+    res_exp <- split(res_exp, row(res_exp))
+    ## strsplit " & ", unlist and write to matrix 
+    res_exp <- lapply(res_exp, strsplit, split = " & ")
+    res_exp <- lapply(res_exp, unlist)
+    res_exp <- matrix(unlist(res_exp), nrow = length(res_exp), byrow = TRUE)
+    
+    ## 5) go through every row and calculate score: row with highest score is 
+    ## the best match
+    sim <- apply(res_exp, 1, function(a) {
+        sp1_ind <- a[seq(1, ncol(res_exp), 2)]
+        sp2_ind <- a[seq(2, ncol(res_exp), 2)]
+        
+        ## remove elements when two "NA" are at the same position 
+        ## (they were created artificially in the above step when pushing 
+        ## sp1/sp2 out and cbinding the pushed out from the matrix)
+        ind_remove <- sp1_ind == "NA" & sp2_ind == "NA"
+        sp1_ind <- sp1_ind[!ind_remove]
+        sp2_ind <- sp2_ind[!ind_remove]
+
+        ## create vectors that store mz and intensity of combination
+        mz1 <- rep(NA, length(sp1_ind))
+        int1 <- numeric(length(sp1_ind))
+        mz2 <- rep(NA, length(sp2_ind))
+        int2 <- numeric(length(sp2_ind))
+        
+        mz1[sp1_ind != "NA"] <- x[sp1_ind[sp1_ind != "NA"], "mz"]
+        mz2[sp2_ind != "NA"] <- y[sp2_ind[sp2_ind != "NA"], "mz"]
+        int1[sp1_ind != "NA"] <- x[sp1_ind[sp1_ind != "NA"], "intensity"]
+        int2[sp2_ind != "NA"] <- y[sp2_ind[sp2_ind != "NA"], "intensity"]
+        
+        ## create matrices of matched spectra    
+        sp1 <- matrix(c(mz1, int1), ncol = 2)
+        sp2 <- matrix(c(mz2, int2), ncol = 2)
+        colnames(sp1) <- colnames(sp2) <- c("mz", "intensity")
+        
+        ## get similarity value from function FUN
+        value <- FUN(sp1, sp2, ...)
+        
+        list(value = value, x = sp1, y = sp2)
+        
+    })
+    sim_value <- unlist(lapply(sim, "[[", "value"))
+    sim_max <- sim[[which.max(sim_value)]]
+    
+    # ## sort according to ascending order
+    x_max <- as.matrix(sim_max[["x"]])
+    y_max <- as.matrix(sim_max[["y"]])
+    
+    sort_x_y <- lapply(seq_len(nrow(x_max)), function(a)
+        paste(sort(c(x_max[a, "mz"], y_max[a, "mz"])), collapse = "_")
+    )
+    sort_x_y <- order(unlist(sort_x_y))
+    x_max <- x_max[sort_x_y, ]
+    y_max <- y_max[sort_x_y, ]
+    
+    list(x = x_max, y = y_max)
+    
+}
+
+#' @name shiftMatrix
+#' @title Shift columns of a matrix by n and set added columns to def
+#' @description `shiftMatrix` shifts columns of a matrix by `n` and 
+#' sets the added columns to `def`.
+#' @usage shiftMatrix(mat, x, n, def = NA)
+#' @param mat `matrix`
+#' @param x `numeric`, col indices to shift
+#' @param n `numeric(1)`, gives the number by how many positions the columns 
+#' `x` of `mat` are shifted
+#' @param def `character(1)`/`numeric(1)`, replacement value for added columns
+#' @details helper function for `graphPeaks`
+#' @return matrix with all combinations of shifted rows, only returns the 
+#' columns `x` of `mat`
+#' @author Thomas Naake \email{thomasnaake@@googlemail.com}
+#' @examples 
+#' mat <- matrix(letters[1:18], ncol = 6, nrow = 3)
+#' x <- c(2, 4, 6)
+#' shiftMatrix(mat = mat, x = x, n = 1, def = NA)
+#' @export
+shiftMatrix <- function(mat, x, n, def = NA){
+    if (n == 0) { res <- mat }
+    if (n < 0) {
+        n <- abs(n)
+        res <- mat[, x[ seq_len(length(x) - n) + n ]]
+        res <- cbind(matrix(res, nrow = nrow(mat), byrow = FALSE),
+                    matrix(def, nrow = nrow(mat), ncol = n))
+    } else {
+        res <- mat[, x[ seq_len(length(x) - n) ]]
+        res <- cbind(matrix(def, nrow = nrow(mat), ncol = n),
+                    matrix(res, nrow(mat), byrow = FALSE))
+    }
+    return(res)
+}
+
diff --git a/README.md b/README.md
index e44d0098..0957b381 100644
--- a/README.md
+++ b/README.md
@@ -18,4 +18,5 @@ Currently we externalising core functions from `MSnbase`, `MALDIquant`, etc.
 # Contributions
 
 - Sigurdur Smarason (@SiggiSmara): weighted moving average (https://github.com/sgibb/MALDIquant/pull/54)
+- Thomas Naake (@tnaake): dotproduct calculation (https://github.com/rformassspectrometry/MsCoreUtils/pull/17), graphPeaks function 
 
diff --git a/man/dotproduct.Rd b/man/dotproduct.Rd
new file mode 100644
index 00000000..f94e6a96
--- /dev/null
+++ b/man/dotproduct.Rd
@@ -0,0 +1,72 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dotproduct.R
+\name{dotproduct}
+\alias{dotproduct}
+\title{Calculate the normalized dot product}
+\usage{
+dotproduct(x, y, m = 0.5, n = 0)
+}
+\arguments{
+\item{x}{\code{matrix} with two column where one contains m/z values (column \code{"mz"}) and
+the second corresponding intensity values (column \code{"intensity"})}
+
+\item{y}{\code{matrix} with two column where one contains m/z values (column \code{"mz"}) and
+the second corresponding intensity values (column \code{"intensity"})}
+
+\item{m}{\code{numeric(1)}, exponent for peak intensity-based weights}
+
+\item{n}{\code{numeric(1)}, exponent for m/z-based weights}
+}
+\value{
+\code{numeric(1)}, \code{dotproduct} returns a numeric similarity coefficient between
+0 and 1.
+}
+\description{
+Calculate the normalized dot product (NDP).\code{dotproduct} returns a numeric
+value ranging between 0 and 1, where 0 indicates no similarity between the
+two MS/MS features, while 1 indicates that the MS/MS features are identical.
+}
+\details{
+Each row in \code{x} corresponds to the respective row in \code{y}, i.e. the peaks
+(entries \code{"mz"}) per spectrum have to match.
+
+\code{m} and \code{n} are weights given on the peak intensity and the m/z values
+respectively. As default (\code{m = 0.5}), the square root of the intensity
+values are taken to calculate weights. With increasing values for \code{m}, high
+intensity values become more important for the similarity calculation,
+i.e. the differences between intensities will be aggravated.
+With increasing values for \code{n}, high m/z values will be taken more into
+account for similarity calculation. Especially when working with small
+molecules, a value \code{n > 0} can be set, to give a weight on the m/z values to
+accommodate that shared fragments with higher m/z are less likely and will
+mean that molecules might be more similar. If \code{n != 0}, a warning will be
+raised if the corresponding m/z values are not identical, since small
+differences in m/z values will distort the similarity values with increasing
+\code{n}. If \code{m=0} or \code{n=0}, intensity values or m/z values, respectively, are not
+taken into account.
+
+The normalized dot product is calculated according to:
+\deqn{NDP = \frac{\sum(W_{S1, i} \cdot W_{S2, i}) ^ 2}{ \sum(W_{S1, i} ^ 2) * \sum(W_{S2, i} ^ 2) }}{\sum(W_{S1, i} \cdot W_{S2, i}) ^ 2 \sum(W_{S1, i} ^ 2) * \sum(W_{S2, i} ^ 2)},
+with \eqn{W = [ peak intensity] ^{m} \cdot [m/z]^n}.
+For further information on normalized dot product see for example
+Li et al. (2015).
+Prior to calculating \deqn{W_{S1}} or \deqn{W_{S2}}, all intensity values
+are divided by the maximum intensity value and multiplied by 100.
+}
+\examples{
+x <- matrix(c(c(100.002, 100.001, NA, 300.01, 300.02, NA), 
+        c(2, 1.5, 0, 1.2, 0.9, 0)), ncol = 2,)
+y <- matrix(c(c(100.0, NA, 200.0, 300.002, 300.025, 300.0255),
+        c(2, 0, 3, 1, 4, 0.4)), ncol = 2)       
+colnames(x) <- colnames(y) <- c("mz", "intensity")
+dotproduct(x, y, m = 0.5, n = 0) 
+
+}
+\references{
+Li et al. (2015): Navigating natural variation in herbivory-induced
+secondary metabolism in coyote tobacco populations using MS/MS structural
+analysis. PNAS, E4147--E4155, DOI: 10.1073/pnas.1503106112.
+}
+\author{
+Thomas Naake, \email{thomasnaake@googlemail.com}
+}
diff --git a/man/graphPeaks.Rd b/man/graphPeaks.Rd
new file mode 100644
index 00000000..3e5e254b
--- /dev/null
+++ b/man/graphPeaks.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/graphPeaks.R
+\name{graphPeaks}
+\alias{graphPeaks}
+\title{Match two spectra using bipartite networks and combinatorics}
+\usage{
+graphPeaks(x, y, ppm = 20, FUN = dotproduct, ...)
+}
+\arguments{
+\item{x}{\code{matrix}, the first column (\code{"mz"}) contains m/z value and the
+second column (\code{"intensity"}) contains the corresponding intensity values}
+
+\item{y}{\code{matrix}, the first column (\code{"mz"}) contains m/z value and the
+second column (\code{"intensity"}) contains the corresponding intensity values}
+
+\item{ppm}{\code{numeric}, tolerance parameter in ppm to match corresponding peaks}
+
+\item{FUN}{function to calculate similarity between spectra}
+
+\item{...}{additional parameters passed to \code{FUN}}
+}
+\value{
+list with elements \code{x} and \code{y} each being a matrix with columns \code{"mz"}
+and \code{"intensity"}. Each row (peak) in \code{x} matches the row (peak) in \code{y}
+}
+\description{
+Function to match spectrum objects using bipartite networks and combinations
+\code{y} as input that contain spectral information. The matching
+is a multi-step procedure:
+\enumerate{
+\item filtering based on \code{ppm},
+\item retain order of matches between features of \code{x} and
+\code{y} (remove crossing edges that violate the order of matching
+m/z),
+\item calculate all combinations of the remaining possibilities.
+}
+}
+\details{
+Objective function is highest similarites between the two
+spectral objects, i.e. \code{FUN} is calculated over all combinations and
+the similarity of the combination that yields the highest similarity is
+returned.
+}
+\examples{
+x <- matrix(c(c(100.001, 100.002, 300.01, 300.02),
+    c(1, 1, 1, 1)), ncol = 2, nrow = 4, byrow = FALSE)
+colnames(x) <- c("mz", "intensity")
+
+y <- matrix(c(c(100.0, 200.0, 300.002, 300.025, 300.0255),
+    c(1, 1, 1, 1, 1)), ncol = 2, nrow = 5, byrow = FALSE)
+colnames(y) <- c("mz", "intensity")
+graphPeaks(x = x, y = y, ppm = 20, FUN = dotproduct)
+}
+\author{
+Thomas Naake \email{thomasnaake@googlemail.com}
+}
diff --git a/man/shiftMatrix.Rd b/man/shiftMatrix.Rd
new file mode 100644
index 00000000..374f3a70
--- /dev/null
+++ b/man/shiftMatrix.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/graphPeaks.R
+\name{shiftMatrix}
+\alias{shiftMatrix}
+\title{Shift columns of a matrix by n and set added columns to def}
+\usage{
+shiftMatrix(mat, x, n, def = NA)
+}
+\arguments{
+\item{mat}{\code{matrix}}
+
+\item{x}{\code{numeric}, col indices to shift}
+
+\item{n}{\code{numeric(1)}, gives the number by how many positions the columns
+\code{x} of \code{mat} are shifted}
+
+\item{def}{\code{character(1)}/\code{numeric(1)}, replacement value for added columns}
+}
+\value{
+matrix with all combinations of shifted rows, only returns the
+columns \code{x} of \code{mat}
+}
+\description{
+\code{shiftMatrix} shifts columns of a matrix by \code{n} and
+sets the added columns to \code{def}.
+}
+\details{
+helper function for \code{graphPeaks}
+}
+\examples{
+mat <- matrix(letters[1:18], ncol = 6, nrow = 3)
+x <- c(2, 4, 6)
+shiftMatrix(mat = mat, x = x, n = 1, def = NA)
+}
+\author{
+Thomas Naake \email{thomasnaake@googlemail.com}
+}
diff --git a/tests/testthat/test_dotproduct.R b/tests/testthat/test_dotproduct.R
new file mode 100644
index 00000000..d6761710
--- /dev/null
+++ b/tests/testthat/test_dotproduct.R
@@ -0,0 +1,75 @@
+test_that("dotproduct", {
+    
+    ## create two dummy spectra (aligned)
+    x <- matrix(c(c(100.002, 100.001, NA, 300.01, 300.02, NA), 
+             c(2, 1.5, 0, 1.2, 0.9, 0)), ncol = 2)
+    y <- matrix(c(c(100.0, NA, 200.0, 300.002, 300.025, 300.0255),
+             intensity = c(2, 0, 3, 1, 4, 0.4)), ncol = 2)
+    colnames(x) <- colnames(y) <- c("mz", "intensity")
+    
+    ## calculate similarity values
+    expect_warning(dotproduct(x, y, m = 0, n = 2), "not identical")
+    expect_warning(dotproduct(x, y, n = 2), "not identical")
+    expect_equal(dotproduct(x, y, m = 0.5, n = 0), 0.4280249, tolerance = 1e-06)
+    expect_equal(dotproduct(x, y, m = 0, n = 0), 1)
+    expect_equal(dotproduct(x, y, n = 0), 0.4280249, tolerance = 1e-06)
+    
+    
+    ## if the identical spectra are passed, dotproduct has to return 1
+    expect_equal(dotproduct(x, x, n = 2), 1)
+    expect_equal(dotproduct(y, y, n = 2), 1)
+    
+    ## check exceptions
+    expect_error(dotproduct(x[-1, ], y), "nrow[(]x[)] and nrow[(]y[)]")
+    expect_error(dotproduct(x, y[-1, ]), "nrow[(]x[)] and nrow[(]y[)]")
+    expect_error(dotproduct(x = x), "is missing, with no default")
+    expect_error(dotproduct(y = y), "is missing, with no default")
+    expect_error(dotproduct(x = x, y = "a"), "not a matrix")
+    expect_error(dotproduct(x = "a", y = y), "not a matrix")
+    expect_error(dotproduct(x, y, m = "a", n = 0), 
+                 "has to be a numeric of length 1")
+    expect_error(dotproduct(x, y, m = 0.5, n = "a"), 
+                 "has to be a numeric of length 1")
+    expect_error(dotproduct(x, y, m = 1:2, n = 0),
+                 "has to be a numeric of length 1")
+    expect_error(dotproduct(x, y, m = 0.5, n = 1:2),
+                 "has to be a numeric of length 1")
+    
+    ## check for columns 'mz' and 'intensity'
+    x <- matrix(c(1:3, 1:3), ncol = 2)
+    y <- matrix(c(1:3, 1:3), ncol = 2)
+    colnames(x) <- c("foo", "intensity")
+    colnames(y) <- c("mz", "intensity")
+    expect_error(dotproduct(x, y), "subscript out of bounds")
+    colnames(x) <- c("mz", "foo")
+    colnames(y) <- c("mz", "intensity")
+    expect_error(dotproduct(x, y), "subscript out of bounds")
+    colnames(x) <- c("mz", "intensity")
+    colnames(y) <- c("foo", "intensity")
+    expect_error(dotproduct(x, y), "subscript out of bounds")
+    colnames(x) <- c("mz", "intensity")
+    colnames(y) <- c("mz", "foo")
+    expect_error(dotproduct(x, y), "subscript out of bounds")
+    
+    ## test for matrix
+    x <- list(mz = 1:3, intensity = 1:3)
+    y <- matrix(c(1:3, 1:3), ncol = 2)
+    colnames(y) <- c("mz", "intensity")
+    expect_error(dotproduct(x, y), "is not a matrix")
+    
+    x <- matrix(c(1:3, 1:3), ncol = 2)
+    colnames(x) <- c("mz", "intensity")
+    y <- list(mz = 1:3, intensity = 1:3)
+    expect_error(dotproduct(x, y), "is not a matrix")
+    
+    ## test for mode 'numeric'
+    x <- matrix(c(c("a", "b", "c"), c(1:3)), ncol = 2)
+    y <- matrix(c(1:3, 1:3), ncol = 2)
+    colnames(x) <- colnames(y) <- c("mz", "intensity")
+    expect_error(dotproduct(x, y), "non-numeric argument to binary operator")
+    x <- matrix(c(1:3, 1:3), ncol = 2) 
+    y <- matrix(c(c("a", "b", "c"), c(1:3)), ncol = 2)
+    colnames(x) <- colnames(y) <- c("mz", "intensity")
+    expect_error(dotproduct(x, y), "non-numeric argument to binary operator")
+    
+})
\ No newline at end of file
diff --git a/tests/testthat/test_graphPeaks.R b/tests/testthat/test_graphPeaks.R
new file mode 100644
index 00000000..c482f50f
--- /dev/null
+++ b/tests/testthat/test_graphPeaks.R
@@ -0,0 +1,85 @@
+## function shiftMatrix
+library("testthat")
+
+test_that("shiftMatrix", {
+    
+    ## create test matrices
+    mat_l <- matrix(letters[1:18], ncol = 6, nrow = 3)
+    ## n: negative, p: positive
+    mat_n1 <- matrix(c("j", "p", NA, "k", "q", NA, "l", "r", NA), 
+                     ncol = 3, nrow = 3, byrow = TRUE)
+    mat_n2 <- matrix(c("p", NA, NA, "q", NA, NA, "r", NA, NA), 
+                     ncol = 3, nrow = 3, byrow = TRUE)
+    mat_p1 <- matrix(c(NA, "d", "j", NA, "e", "k", NA, "f", "l"), 
+                     ncol = 3, nrow = 3, byrow = TRUE)
+    mat_p2 <- matrix(c(NA, NA, "d", NA, NA, "e", NA, NA, "f"), 
+                     ncol = 3, nrow = 3, byrow = TRUE)
+    
+    expect_equal(shiftMatrix(mat_l, x = c(2, 4, 6), n = -1), mat_n1)
+    expect_equal(shiftMatrix(mat_l, x = c(2, 4, 6), n = -2), mat_n2)
+    expect_equal(shiftMatrix(mat_l, x = c(2, 4, 6), n = 1), mat_p1)
+    expect_equal(shiftMatrix(mat_l, x = c(2, 4, 6), n = 2), mat_p2)
+    expect_equal(shiftMatrix(mat_l, x = c(2, 4, 6), n = 0), mat_l[, c(2, 4, 6)])
+    expect_error(shiftMatrix(x = c(2, 4, 6), n = 1, def = NA), 
+        "is missing, with no default")
+    expect_error(shiftMatrix(mat = mat_l, n = 1, def = NA),
+        "is missing, with no default")
+    expect_error(shiftMatrix(mat = mat_l, x = c(2, 4, 6), def = NA),
+        "is missing, with no default")
+    expect_error(shiftMatrix(mat = mat_l, x = c(2, 4, 6, 8, 10), n = 1, def = NA),
+        "subscript out of bounds")
+})
+
+
+## function graphPeaks
+library("testthat")
+
+## create example x and y and perform tests
+x <- matrix(c(c(100.001, 100.002, 300.01, 300.02),
+              c(1, 1, 1, 1)), ncol = 2, nrow = 4, byrow = FALSE)
+colnames(x) <- c("mz", "intensity")
+
+y <- matrix(c(c(100.0, 200.0, 300.002, 300.025, 300.0255),
+              c(1, 1, 1, 1, 1)), ncol = 2, nrow = 5, byrow = FALSE)
+colnames(y) <- c("mz", "intensity")
+
+## create matrices that contain the result
+x_match <- matrix(c(c(100.002, 100.001, NA, 300.01,  300.02, NA), 
+                    c(1, 1, 0, 1, 1, 0)), ncol = 2, nrow = 6)
+colnames(x_match) <- c("mz", "intensity")
+
+y_match <- matrix(c(c(100.0, NA, 200.0, 300.002, 300.025, 300.0255),
+                    c(1, 0, 1, 1, 1, 1)), ncol = 2, nrow = 6)
+colnames(y_match) <- c("mz", "intensity")
+
+test_that("graphPeaks", {
+    ## tests
+    expect_equal(graphPeaks(x = x, y = x), list(x = x, y = x),
+        tolerance = 1e-05)
+    expect_equal(graphPeaks(x = y, y = y), list(x = y, y = y), 
+        tolerance = 1e-05)
+    expect_equal(graphPeaks(x = x, y = y, m = 0.5, n = 0)$x, x_match, tolerance = 1e-05)
+    expect_equal(graphPeaks(x = x, y = y, m = 0.5, n = 0)$y, y_match, tolerance = 1e-05)
+    expect_true(is.list(graphPeaks(x = x, y = y)))
+    expect_error(graphPeaks(x = x[1, ], y = y), "is not a matrix")
+    expect_error(graphPeaks(x = x, y = y[[1,]]), "subscript out of bounds")
+    expect_error(graphPeaks(x = x), "is missing, with no default")
+    expect_error(graphPeaks(y = y), "is missing, with no default")
+    expect_error(graphPeaks(x = x, y = y, FUN = max), "attempt to select less")
+    
+    ## ppm
+    expect_error(graphPeaks(x = x, y = y, ppm = "a"), "is not numeric")
+    expect_error(graphPeaks(x = x, y = y, ppm = -1), "has to be positive")
+    
+    ## test for mode
+    x_chr <- matrix(c("a", "b", "c", "d"), ncol = 2)
+    expect_error(graphPeaks(x = x_chr, y = y), "is not \'numeric\'")
+    expect_error(graphPeaks(x = x, y = x_chr), "is not \'numeric\'")
+    
+    ## test for colnames 
+    x_colnames <- matrix(c(c(100.001, 100.002, 300.01, 300.02),
+                  c(1, 1, 1, 1)), ncol = 2, nrow = 4, byrow = FALSE)
+    expect_error(graphPeaks(x = x_colnames, y = y), "subscript out of bounds")
+    expect_error(graphPeaks(x = y, y = x_colnames), "subscript out of bounds")
+
+})