- Implement the plug-in bayesian algorithm
- Generate a plot as a working demo of this method
- Consider cases, when the borderline is: parabola, ellipse, hyperbola
The essence of plug-in algorithm is building estimates of the expectation vector and the covariance matrix and then "plugging" them in the likelihood function / optimal Bayesian classifier, hence the name. The estimates themselves are computed using the following formulas:
Expectation for the certain feature type, essentially a mean:
Covariance matrix estimate:
Here m
is number of features in the input vector, x_i
- certain feature value, and ^T
denotes matrix transposition.
Normal distributions | Parabola-shaped borderline |
Ellipse-shaped borderline | Hyperbole-shaped borderline |
getPriorProbabilities <- function (dset, column) {
dsetLength <- dim(dset)[1]
return (table(dset[, column]) / dsetLength)
}
getCovMatrix <- function(dset, expectation) {
featureTypesCount <- dim(dset)[2]
dsetLength <- dim(dset)[1]
cov <- matrix(0, nrow = featureTypesCount - 1, ncol = featureTypesCount - 1)
for (i in 1:dsetLength) {
currentFeature <- as.matrix(dset[i, 1:featureTypesCount - 1], nrow = 1)
tmp <- t(currentFeature - expectation) %*% (currentFeature - expectation)
cov <- cov + tmp
}
cov <- cov / (dsetLength - 1)
return(cov)
}
getLikelihood <- function(point, expectation, covMatrix) {
pointMinusMean <- t(as.matrix(point - expectation))
covMatrixInverse <- solve(covMatrix)
nom <- exp(-(pointMinusMean %*% covMatrixInverse %*% t(pointMinusMean)) / 2)
denom <- sqrt((2 * pi) ^ nrow(covMatrix) * det(covMatrix))
return(nom / denom)
}
getPlugInProbability <- function(dset, classesColumn, className, point, lambda) {
featuresCount <- length(point)
croppedDset <- dset[which(dset[, classesColumn] == className), ]
dsetLength <- dim(dset)[1]
prior <- getPriorProbabilities(dset, classesColumn)[className]
expectations <- array(dim = featuresCount)
for (i in 1:featuresCount) {
expectations[i] <- mean(croppedDset[, i])
}
# lambda = rep(1, featuresCount)
covMatrix <- getCovMatrix(croppedDset, matrix(expectations, nrow=1))
likelihood <- getLikelihood(point, expectations, covMatrix)
probability <- lambda * prior * likelihood;
return (probability)
}
plugInClassifier <- function(point, dset) {
classes <- unique(dset[, 3])
classesCount <- length(classes)
scores <- array(dim = classesCount)
for (i in 1:classesCount) {
scores[i] <- getPlugInProbability(dset, 'class', classes[i], point, lambdas[i])
}
return(classes[which.max(scores)])
}
The app is live on shinyapps.io.