8. Compare models

##### Beginning of file

# This file was generated by PredictMD version 0.19.0
# For help, please visit https://www.predictmd.net

import PredictMD

### Begin project-specific settings

PredictMD.require_julia_version("v0.6")

PredictMD.require_predictmd_version("0.19.0")

# PredictMD.require_predictmd_version("0.19.0", "0.20.0-")

PROJECT_OUTPUT_DIRECTORY = PredictMD.project_directory(
    homedir(),
    "Desktop",
    "breast_cancer_biopsy_example",
    )

### End project-specific settings

### Begin model comparison code

import CSV
import Compat
import DataFrames
import FileIO
import JLD2
import Knet

srand(999)

trainingandvalidation_features_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "trainingandvalidation_features_df.csv",
    )
trainingandvalidation_labels_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "trainingandvalidation_labels_df.csv",
    )
testing_features_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "testing_features_df.csv",
    )
testing_labels_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "testing_labels_df.csv",
    )
training_features_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "training_features_df.csv",
    )
training_labels_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "training_labels_df.csv",
    )
validation_features_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "validation_features_df.csv",
    )
validation_labels_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "validation_labels_df.csv",
    )
trainingandvalidation_features_df = CSV.read(
    trainingandvalidation_features_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
trainingandvalidation_labels_df = CSV.read(
    trainingandvalidation_labels_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
testing_features_df = CSV.read(
    testing_features_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
testing_labels_df = CSV.read(
    testing_labels_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
training_features_df = CSV.read(
    training_features_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
training_labels_df = CSV.read(
    training_labels_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
validation_features_df = CSV.read(
    validation_features_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
validation_labels_df = CSV.read(
    validation_labels_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )

smoted_training_features_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "smoted_training_features_df.csv",
    )
smoted_training_labels_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "smoted_training_labels_df.csv",
    )
smoted_training_features_df = CSV.read(
    smoted_training_features_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
smoted_training_labels_df = CSV.read(
    smoted_training_labels_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )

logistic_classifier_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "logistic_classifier.jld2",
    )
random_forest_classifier_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "random_forest_classifier.jld2",
    )
c_svc_svm_classifier_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "c_svc_svm_classifier.jld2",
    )
nu_svc_svm_classifier_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "nu_svc_svm_classifier.jld2",
    )
knet_mlp_classifier_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "knet_mlp_classifier.jld2",
    )

logistic_classifier =
    PredictMD.load_model(logistic_classifier_filename)
random_forest_classifier =
    PredictMD.load_model(random_forest_classifier_filename)
c_svc_svm_classifier =
    PredictMD.load_model(c_svc_svm_classifier_filename)
nu_svc_svm_classifier =
    PredictMD.load_model(nu_svc_svm_classifier_filename)
knet_mlp_classifier =
    PredictMD.load_model(knet_mlp_classifier_filename)
PredictMD.parse_functions!(knet_mlp_classifier)

all_models = PredictMD.Fittable[
    logistic_classifier,
    random_forest_classifier,
    c_svc_svm_classifier,
    nu_svc_svm_classifier,
    knet_mlp_classifier,
    ]

single_label_name = :Class
negative_class = "benign"
positive_class = "malignant"

single_label_levels = [negative_class, positive_class]

categorical_label_names = Symbol[single_label_name]
continuous_label_names = Symbol[]
label_names = vcat(categorical_label_names, continuous_label_names)

Compat.@info(PredictMD.singlelabelbinaryclassificationmetrics(
    all_models,
    training_features_df,
    training_labels_df,
    single_label_name,
    positive_class;
    sensitivity = 0.95,
    ))
Compat.@info(PredictMD.singlelabelbinaryclassificationmetrics(
    all_models,
    training_features_df,
    training_labels_df,
    single_label_name,
    positive_class;
    specificity = 0.95,
    ))
Compat.@info(PredictMD.singlelabelbinaryclassificationmetrics(
    all_models,
    training_features_df,
    training_labels_df,
    single_label_name,
    positive_class;
    maximize = :f1score,
    ))
Compat.@info(PredictMD.singlelabelbinaryclassificationmetrics(
    all_models,
    training_features_df,
    training_labels_df,
    single_label_name,
    positive_class;
    maximize = :cohen_kappa,
    ))

Compat.@info(PredictMD.singlelabelbinaryclassificationmetrics(
    all_models,
    testing_features_df,
    testing_labels_df,
    single_label_name,
    positive_class;
    sensitivity = 0.95,
    ))
Compat.@info(PredictMD.singlelabelbinaryclassificationmetrics(
    all_models,
    testing_features_df,
    testing_labels_df,
    single_label_name,
    positive_class;
    specificity = 0.95,
    ))
Compat.@info(PredictMD.singlelabelbinaryclassificationmetrics(
    all_models,
    testing_features_df,
    testing_labels_df,
    single_label_name,
    positive_class;
    maximize = :f1score,
    ))
Compat.@info(PredictMD.singlelabelbinaryclassificationmetrics(
    all_models,
    testing_features_df,
    testing_labels_df,
    single_label_name,
    positive_class;
    maximize = :cohen_kappa,
    ))

rocplottesting = PredictMD.plotroccurves(
    all_models,
    testing_features_df,
    testing_labels_df,
    single_label_name,
    positive_class,
    )
PredictMD.open_plot(rocplottesting)

prplottesting = PredictMD.plotprcurves(
    all_models,
    testing_features_df,
    testing_labels_df,
    single_label_name,
    positive_class,
    )
PredictMD.open_plot(prplottesting)

### End model comparison code

##### End of file

Info: Attempting to load model...
Info: Loaded model from file "/tmp/tmpxV7LRJ/PREDICTMDTEMPDIRECTORY/logistic_classifier.jld2"
Info: Attempting to load model...
Info: Loaded model from file "/tmp/tmpxV7LRJ/PREDICTMDTEMPDIRECTORY/random_forest_classifier.jld2"
Info: Attempting to load model...
Info: Loaded model from file "/tmp/tmpxV7LRJ/PREDICTMDTEMPDIRECTORY/c_svc_svm_classifier.jld2"
Info: Attempting to load model...
Info: Loaded model from file "/tmp/tmpxV7LRJ/PREDICTMDTEMPDIRECTORY/nu_svc_svm_classifier.jld2"
Info: Attempting to load model...
Info: Loaded model from file "/tmp/tmpxV7LRJ/PREDICTMDTEMPDIRECTORY/knet_mlp_classifier.jld2"
WARNING: Method definition knetmlp_predict(Any, AbstractArray{T, N} where N where T) in module PredictMD at none:5 overwritten at none:6.
WARNING: Method definition #knetmlp_predict(Array{Any, 1}, typeof(PredictMD.knetmlp_predict), Any, AbstractArray{T, N} where N where T) in module PredictMD overwritten.
WARNING: Method definition knetmlp_loss(Function, Any, AbstractArray{T, N} where N where T, AbstractArray{T, N} where N where T) in module PredictMD at none:9 overwritten at none:9.
WARNING: Method definition #knetmlp_loss(Array{Any, 1}, typeof(PredictMD.knetmlp_loss), Function, Any, AbstractArray{T, N} where N where T, AbstractArray{T, N} where N where T) in module PredictMD overwritten.
Info: 12×6 DataFrames.DataFrame. Omitted printing of 4 columns
│ Row │ metric                                           │ Logistic regression │
├─────┼──────────────────────────────────────────────────┼─────────────────────┤
│ 1   │ AUPRC                                            │ 0.990361            │
│ 2   │ AUROCC                                           │ 0.995607            │
│ 3   │ Average precision                                │ 0.990415            │
│ 4   │ * Threshold                                      │ 0.672809            │
│ 5   │ * Accuracy                                       │ 0.967742            │
│ 6   │ * Cohen's Kappa statistic                        │ 0.912352            │
│ 7   │ * F1 Score                                       │ 0.956175            │
│ 8   │ * Precision (positive predictive value)          │ 0.96                │
│ 9   │ * Negative predictive value                      │ 0.972222            │
│ 10  │ * Recall (sensitivity, true positive rate)       │ 0.952381            │
│ 11  │ [fix] * Sensitivity (recall, true positive rate) │ 0.952381            │
│ 12  │ * Specificity (true negative rate)               │ 0.976744            │
Info: 12×6 DataFrames.DataFrame. Omitted printing of 4 columns
│ Row │ metric                                     │ Logistic regression │
├─────┼────────────────────────────────────────────┼─────────────────────┤
│ 1   │ AUPRC                                      │ 0.990361            │
│ 2   │ AUROCC                                     │ 0.995607            │
│ 3   │ Average precision                          │ 0.990415            │
│ 4   │ * Threshold                                │ 0.254932            │
│ 5   │ * Accuracy                                 │ 0.970674            │
│ 6   │ * Cohen's Kappa statistic                  │ 0.923775            │
│ 7   │ * F1 Score                                 │ 0.961832            │
│ 8   │ * Precision (positive predictive value)    │ 0.926471            │
│ 9   │ * Negative predictive value                │ 1.0                 │
│ 10  │ * Recall (sensitivity, true positive rate) │ 1.0                 │
│ 11  │ * Sensitivity (recall, true positive rate) │ 1.0                 │
│ 12  │ [fix] * Specificity (true negative rate)   │ 0.953488            │
Info: 12×6 DataFrames.DataFrame. Omitted printing of 4 columns
│ Row │ metric                                     │ Logistic regression │
├─────┼────────────────────────────────────────────┼─────────────────────┤
│ 1   │ AUPRC                                      │ 0.990361            │
│ 2   │ AUROCC                                     │ 0.995607            │
│ 3   │ Average precision                          │ 0.990415            │
│ 4   │ * Threshold                                │ 0.40199             │
│ 5   │ * Accuracy                                 │ 0.97654             │
│ 6   │ * Cohen's Kappa statistic                  │ 0.938018            │
│ 7   │ [max] * F1 score                           │ 0.968992            │
│ 8   │ * Precision (positive predictive value)    │ 0.94697             │
│ 9   │ * Negative predictive value                │ 0.995215            │
│ 10  │ * Recall (sensitivity, true positive rate) │ 0.992063            │
│ 11  │ * Sensitivity (recall, true positive rate) │ 0.992063            │
│ 12  │ * Specificity (true negative rate)         │ 0.967442            │
Info: 12×6 DataFrames.DataFrame. Omitted printing of 4 columns
│ Row │ metric                                     │ Logistic regression │
├─────┼────────────────────────────────────────────┼─────────────────────┤
│ 1   │ AUPRC                                      │ 0.990361            │
│ 2   │ AUROCC                                     │ 0.995607            │
│ 3   │ Average precision                          │ 0.990415            │
│ 4   │ * Threshold                                │ 0.40199             │
│ 5   │ * Accuracy                                 │ 0.97654             │
│ 6   │ [max] * Cohen's Kappa statistic            │ 0.938018            │
│ 7   │ * F1 Score                                 │ 0.968992            │
│ 8   │ * Precision (positive predictive value)    │ 0.94697             │
│ 9   │ * Negative predictive value                │ 0.995215            │
│ 10  │ * Recall (sensitivity, true positive rate) │ 0.992063            │
│ 11  │ * Sensitivity (recall, true positive rate) │ 0.992063            │
│ 12  │ * Specificity (true negative rate)         │ 0.967442            │
Info: 12×6 DataFrames.DataFrame. Omitted printing of 4 columns
│ Row │ metric                                           │ Logistic regression │
├─────┼──────────────────────────────────────────────────┼─────────────────────┤
│ 1   │ AUPRC                                            │ 0.980282            │
│ 2   │ AUROCC                                           │ 0.990163            │
│ 3   │ Average precision                                │ 0.980464            │
│ 4   │ * Threshold                                      │ 0.313116            │
│ 5   │ * Accuracy                                       │ 0.94152             │
│ 6   │ * Cohen's Kappa statistic                        │ 0.836242            │
│ 7   │ * F1 Score                                       │ 0.918033            │
│ 8   │ * Precision (positive predictive value)          │ 0.888889            │
│ 9   │ * Negative predictive value                      │ 0.972222            │
│ 10  │ * Recall (sensitivity, true positive rate)       │ 0.949153            │
│ 11  │ [fix] * Sensitivity (recall, true positive rate) │ 0.949153            │
│ 12  │ * Specificity (true negative rate)               │ 0.9375              │
Info: 12×6 DataFrames.DataFrame. Omitted printing of 4 columns
│ Row │ metric                                     │ Logistic regression │
├─────┼────────────────────────────────────────────┼─────────────────────┤
│ 1   │ AUPRC                                      │ 0.980282            │
│ 2   │ AUROCC                                     │ 0.990163            │
│ 3   │ Average precision                          │ 0.980464            │
│ 4   │ * Threshold                                │ 0.435877            │
│ 5   │ * Accuracy                                 │ 0.94152             │
│ 6   │ * Cohen's Kappa statistic                  │ 0.83338             │
│ 7   │ * F1 Score                                 │ 0.916667            │
│ 8   │ * Precision (positive predictive value)    │ 0.901639            │
│ 9   │ * Negative predictive value                │ 0.963636            │
│ 10  │ * Recall (sensitivity, true positive rate) │ 0.932203            │
│ 11  │ * Sensitivity (recall, true positive rate) │ 0.932203            │
│ 12  │ [fix] * Specificity (true negative rate)   │ 0.946429            │
Info: 12×6 DataFrames.DataFrame. Omitted printing of 4 columns
│ Row │ metric                                     │ Logistic regression │
├─────┼────────────────────────────────────────────┼─────────────────────┤
│ 1   │ AUPRC                                      │ 0.980282            │
│ 2   │ AUROCC                                     │ 0.990163            │
│ 3   │ Average precision                          │ 0.980464            │
│ 4   │ * Threshold                                │ 0.738787            │
│ 5   │ * Accuracy                                 │ 0.959064            │
│ 6   │ * Cohen's Kappa statistic                  │ 0.880351            │
│ 7   │ [max] * F1 score                           │ 0.940171            │
│ 8   │ * Precision (positive predictive value)    │ 0.948276            │
│ 9   │ * Negative predictive value                │ 0.964602            │
│ 10  │ * Recall (sensitivity, true positive rate) │ 0.932203            │
│ 11  │ * Sensitivity (recall, true positive rate) │ 0.932203            │
│ 12  │ * Specificity (true negative rate)         │ 0.973214            │
Info: 12×6 DataFrames.DataFrame. Omitted printing of 4 columns
│ Row │ metric                                     │ Logistic regression │
├─────┼────────────────────────────────────────────┼─────────────────────┤
│ 1   │ AUPRC                                      │ 0.980282            │
│ 2   │ AUROCC                                     │ 0.990163            │
│ 3   │ Average precision                          │ 0.980464            │
│ 4   │ * Threshold                                │ 0.738787            │
│ 5   │ * Accuracy                                 │ 0.959064            │
│ 6   │ [max] * Cohen's Kappa statistic            │ 0.880351            │
│ 7   │ * F1 Score                                 │ 0.940171            │
│ 8   │ * Precision (positive predictive value)    │ 0.948276            │
│ 9   │ * Negative predictive value                │ 0.964602            │
│ 10  │ * Recall (sensitivity, true positive rate) │ 0.932203            │
│ 11  │ * Sensitivity (recall, true positive rate) │ 0.932203            │
│ 12  │ * Specificity (true negative rate)         │ 0.973214            │

This page was generated using Literate.jl.