# 分类器评价与在R中的实现：混淆矩阵

Posted by c cm on January 18, 2014

# 一、混淆矩阵简介

Confusion Matrix   Target
Confusion Matrix   Positive Negative
Model Positive TP(True Positive) FP(False Positive)
Negative FN(False Negative) TN(True Negative)
P(Positive Sample) N(Negative Sample)

Accuracy = (TP+TN)/(P+N)
Error Rate = 1 – Accuracy = (FP+FN)/(P+N)
False Positive Rate = Fallout = FP/N
True Positive Rate = Recall = Sensitivity = TP/P
False Negative Rate = Miss = FN/P
True Negative Rate = Specificity = TN/N
Positive Predictive Value = Precision = TP/(TP+FP)
Negative Predictive Value = TN/(TN+FN)
Prediction-conditioned Fallout = FP/(TP+FP)
Prediction-conditioned Miss = FN/(TN+FN)
Rate of Positive Predictions = Detection Prevalence = (TP+FP)/(P+N)
Rate of Negative Predictions = (TN+FN)/(P+N)
Prevalence = (TP+FN)/(P+N)
Detection Rate = TP/(P+N)
Balanced Accuracy = (Sensitivity+Specificity)/2
MCC(Matthews correlation coefficient) = (TP*TN - FP*FN)/[(TP+FP)(TP+FN)(TN+FP)(TN+FN)]^(1/2)

# 二、在R中计算混淆矩阵

require(ROCR)
data(ROCR.simple)
str(ROCR.simple)

## List of 2
##  $predictions: num [1:200] 0.613 0.364 0.432 0.14 0.385 ... ##$ labels     : num [1:200] 1 1 0 0 0 1 1 1 1 0 ...


## 1 用table()直接计算

pred.class <- as.integer(ROCR.simple$predictions > 0.5) print(cft <- table(pred.class, ROCR.simple$labels))

##
## pred.class  0  1
##          0 91 14
##          1 16 79


tp <- cft[2, 2]
tn <- cft[1, 1]
fp <- cft[2, 1]
fn <- cft[1, 2]
print(accuracy <- (tp + tn)/(tp + tn + fp + fn))

## [1] 0.85

print(sensitivity <- tp/(tp + fn))

## [1] 0.8495

print(specificity <- tn/(tn + fp))

## [1] 0.8505


## 2 用confusionMatrix()算

require(caret)
confusionMatrix(cft, positive = "1")

confusionMatrix(pred.class, ROCR.simple\$labels, positive = "1")

## Confusion Matrix and Statistics
##
##           Reference
## Prediction  0  1
##          0 91 14
##          1 16 79
##
##                Accuracy : 0.85
##                  95% CI : (0.793, 0.896)
##     No Information Rate : 0.535
##     P-Value [Acc > NIR] : <2e-16
##
##                   Kappa : 0.699
##  Mcnemar's Test P-Value : 0.855
##
##             Sensitivity : 0.849
##             Specificity : 0.850
##          Pos Pred Value : 0.832
##          Neg Pred Value : 0.867
##              Prevalence : 0.465
##          Detection Rate : 0.395
##    Detection Prevalence : 0.475
##       Balanced Accuracy : 0.850
##
##        'Positive' Class : 1
##