# This file was generated by PredictMD version 0.34.21
# For help, please visit https://predictmd.net
using PredictMDExtra
PredictMDExtra.import_all()
using PredictMD
PredictMD.import_all()
### Begin project-specific settings
DIRECTORY_CONTAINING_THIS_FILE = @__DIR__
PROJECT_DIRECTORY = dirname(
joinpath(splitpath(DIRECTORY_CONTAINING_THIS_FILE)...)
)
PROJECT_OUTPUT_DIRECTORY = joinpath(
PROJECT_DIRECTORY,
"output",
)
mkpath(PROJECT_OUTPUT_DIRECTORY)
mkpath(joinpath(PROJECT_OUTPUT_DIRECTORY, "data"))
mkpath(joinpath(PROJECT_OUTPUT_DIRECTORY, "models"))
mkpath(joinpath(PROJECT_OUTPUT_DIRECTORY, "plots"))
### End project-specific settings
### Begin logistic classifier code
Random.seed!(999)
trainingandtuning_features_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"trainingandtuning_features_df.csv",
)
trainingandtuning_labels_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"trainingandtuning_labels_df.csv",
)
testing_features_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"testing_features_df.csv",
)
testing_labels_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"testing_labels_df.csv",
)
training_features_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"training_features_df.csv",
)
training_labels_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"training_labels_df.csv",
)
tuning_features_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"tuning_features_df.csv",
)
tuning_labels_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"tuning_labels_df.csv",
)
trainingandtuning_features_df = DataFrames.DataFrame(
FileIO.load(
trainingandtuning_features_df_filename;
type_detect_rows = 100,
)
)
trainingandtuning_labels_df = DataFrames.DataFrame(
FileIO.load(
trainingandtuning_labels_df_filename;
type_detect_rows = 100,
)
)
testing_features_df = DataFrames.DataFrame(
FileIO.load(
testing_features_df_filename;
type_detect_rows = 100,
)
)
testing_labels_df = DataFrames.DataFrame(
FileIO.load(
testing_labels_df_filename;
type_detect_rows = 100,
)
)
training_features_df = DataFrames.DataFrame(
FileIO.load(
training_features_df_filename;
type_detect_rows = 100,
)
)
training_labels_df = DataFrames.DataFrame(
FileIO.load(
training_labels_df_filename;
type_detect_rows = 100,
)
)
tuning_features_df = DataFrames.DataFrame(
FileIO.load(
tuning_features_df_filename;
type_detect_rows = 100,
)
)
tuning_labels_df = DataFrames.DataFrame(
FileIO.load(
tuning_labels_df_filename;
type_detect_rows = 100,
)
)
smoted_training_features_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"smoted_training_features_df.csv",
)
smoted_training_labels_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"smoted_training_labels_df.csv",
)
smoted_training_features_df = DataFrames.DataFrame(
FileIO.load(
smoted_training_features_df_filename;
type_detect_rows = 100,
)
)
smoted_training_labels_df = DataFrames.DataFrame(
FileIO.load(
smoted_training_labels_df_filename;
type_detect_rows = 100,
)
)
categorical_feature_names_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"categorical_feature_names.jld2",
)
continuous_feature_names_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"continuous_feature_names.jld2",
)
categorical_feature_names = FileIO.load(
categorical_feature_names_filename,
"categorical_feature_names",
)
continuous_feature_names = FileIO.load(
continuous_feature_names_filename,
"continuous_feature_names",
)
feature_names = vcat(categorical_feature_names, continuous_feature_names)
single_label_name = :Class
negative_class = "benign"
positive_class = "malignant"
single_label_levels = [negative_class, positive_class]
categorical_label_names = Symbol[single_label_name]
continuous_label_names = Symbol[]
label_names = vcat(categorical_label_names, continuous_label_names)
feature_contrasts = PredictMD.generate_feature_contrasts(
smoted_training_features_df,
feature_names,
)
show(
PredictMD.linearly_dependent_columns(
training_features_df,
feature_names,
)
)
logistic_classifier =
PredictMD.singlelabelbinaryclassdataframelogisticclassifier(
feature_names,
single_label_name,
single_label_levels;
package = :GLM,
intercept = true,
interactions = 1,
name = "Logistic regression",
)
PredictMD.fit!(logistic_classifier,
smoted_training_features_df,
smoted_training_labels_df) # TODO: fix this error
PredictMD.get_underlying(logistic_classifier) # TODO: fix this error
logistic_hist_training =
PredictMD.plotsinglelabelbinaryclassifierhistogram( # TODO: fix this error
logistic_classifier,
smoted_training_features_df,
smoted_training_labels_df,
single_label_name,
single_label_levels,
);
display(logistic_hist_training)
PredictMD.save_plot(
joinpath(
PROJECT_OUTPUT_DIRECTORY,
"plots",
"logistic_hist_training.pdf",
),
logistic_hist_training,
)
logistic_hist_testing =
PredictMD.plotsinglelabelbinaryclassifierhistogram(
logistic_classifier,
testing_features_df,
testing_labels_df,
single_label_name,
single_label_levels,
);
display(logistic_hist_testing)
PredictMD.save_plot(
joinpath(
PROJECT_OUTPUT_DIRECTORY,
"plots",
"logistic_hist_testing.pdf",
),
logistic_hist_testing,
)
show(
PredictMD.singlelabelbinaryclassificationmetrics(
logistic_classifier,
smoted_training_features_df,
smoted_training_labels_df,
single_label_name,
positive_class;
sensitivity = 0.95,
);
allrows = true,
allcols = true,
splitcols = false,
)
show(
PredictMD.singlelabelbinaryclassificationmetrics(
logistic_classifier,
testing_features_df,
testing_labels_df,
single_label_name,
positive_class;
sensitivity = 0.95,
);
allrows = true,
allcols = true,
splitcols = false,
)
logistic_calibration_curve =
PredictMD.plot_probability_calibration_curve(
logistic_classifier,
smoted_training_features_df,
smoted_training_labels_df,
single_label_name,
positive_class;
window = 0.2,
);
display(logistic_calibration_curve)
PredictMD.save_plot(
joinpath(
PROJECT_OUTPUT_DIRECTORY,
"plots",
"logistic_calibration_curve.pdf",
),
logistic_calibration_curve,
)
show(
PredictMD.probability_calibration_metrics(
logistic_classifier,
testing_features_df,
testing_labels_df,
single_label_name,
positive_class;
window = 0.1,
);
allrows = true,
allcols = true,
splitcols = false,
)
logistic_cutoffs, logistic_risk_group_prevalences =
PredictMD.risk_score_cutoff_values(
logistic_classifier,
testing_features_df,
testing_labels_df,
single_label_name,
positive_class;
average_function = Statistics.mean,
)
@info(
string(
"Low risk: 0 to $(logistic_cutoffs[1]).",
" Medium risk: $(logistic_cutoffs[1]) to $(logistic_cutoffs[2]).",
" High risk: $(logistic_cutoffs[2]) to 1.",
)
)
@info(logistic_risk_group_prevalences)
logistic_cutoffs, logistic_risk_group_prevalences =
PredictMD.risk_score_cutoff_values(
logistic_classifier,
testing_features_df,
testing_labels_df,
single_label_name,
positive_class;
average_function = Statistics.median,
)
@info(
string(
"Low risk: 0 to $(logistic_cutoffs[1]).",
" Medium risk: $(logistic_cutoffs[1]) to $(logistic_cutoffs[2]).",
" High risk: $(logistic_cutoffs[2]) to 1.",
)
)
@info(logistic_risk_group_prevalences)
logistic_classifier_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"models",
"logistic_classifier.jld2",
)
PredictMD.save_model(logistic_classifier_filename, logistic_classifier)
### End logistic classifier code
# This file was generated by PredictMD version 0.34.21
# For help, please visit https://predictmd.net
This page was generated using Literate.jl.