# This file was generated by PredictMD version 0.34.21
# For help, please visit https://predictmd.net
using PredictMDExtra
PredictMDExtra.import_all()
using PredictMD
PredictMD.import_all()
### Begin project-specific settings
DIRECTORY_CONTAINING_THIS_FILE = @__DIR__
PROJECT_DIRECTORY = dirname(
joinpath(splitpath(DIRECTORY_CONTAINING_THIS_FILE)...)
)
PROJECT_OUTPUT_DIRECTORY = joinpath(
PROJECT_DIRECTORY,
"output",
)
mkpath(PROJECT_OUTPUT_DIRECTORY)
mkpath(joinpath(PROJECT_OUTPUT_DIRECTORY, "data"))
mkpath(joinpath(PROJECT_OUTPUT_DIRECTORY, "models"))
mkpath(joinpath(PROJECT_OUTPUT_DIRECTORY, "plots"))
### End project-specific settings
### Begin data preprocessing code
Random.seed!(999)
df = RDatasets.dataset("MASS", "biopsy")
# PredictMD requires that you provide your data in a DataFrame.
# If your data are in a CSV file (e.g. "data.csv"), load them into
# a DataFrame named `df` with:
# df = DataFrames.DataFrame(CSVFiles.load("data.csv"; type_detect_rows = 10_000))
# If your data are in a gzipped CSV file (e.g. "data.csv.gz"), load them into
# a DataFrame named `df` with:
# df = DataFrames.DataFrame(CSVFiles.load(CSVFiles.File(CSVFiles.format"CSV", "data.csv.gz"); type_detect_rows = 10_000))
# If your data are in some other format, use the appropriate Julia package to
# load your data into a DataFrame named `df`.
categorical_feature_names = Symbol[]
continuous_feature_names = Symbol[
:V1,
:V2,
:V3,
:V4,
:V5,
:V6,
:V7,
:V8,
:V9,
]
categorical_feature_names_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"categorical_feature_names.jld2",
)
continuous_feature_names_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"continuous_feature_names.jld2",
)
FileIO.save(
categorical_feature_names_filename,
"categorical_feature_names",
categorical_feature_names,
)
FileIO.save(
continuous_feature_names_filename,
"continuous_feature_names",
continuous_feature_names,
)
feature_names = vcat(categorical_feature_names, continuous_feature_names)
single_label_name = :Class
negative_class = "benign"
positive_class = "malignant"
single_label_levels = [negative_class, positive_class]
categorical_label_names = Symbol[single_label_name]
continuous_label_names = Symbol[]
label_names = vcat(categorical_label_names, continuous_label_names)
df = df[:, vcat(feature_names, label_names)]
DataFrames.dropmissing!(df; disallowmissing=true,)
PredictMD.shuffle_rows!(df)
PredictMD.fix_column_types!(
df;
categorical_feature_names = categorical_feature_names,
continuous_feature_names = continuous_feature_names,
categorical_label_names = categorical_label_names,
continuous_label_names = continuous_label_names,
)
PredictMD.check_column_types(
df;
categorical_feature_names = categorical_feature_names,
continuous_feature_names = continuous_feature_names,
categorical_label_names = categorical_label_names,
continuous_label_names = continuous_label_names,
)
features_df = df[feature_names]
labels_df = df[label_names]
(trainingandtuning_features_df,
trainingandtuning_labels_df,
testing_features_df,
testing_labels_df,) = PredictMD.split_data(
features_df,
labels_df,
0.75,
)
(training_features_df,
training_labels_df,
tuning_features_df,
tuning_labels_df,) = PredictMD.split_data(
trainingandtuning_features_df,
trainingandtuning_labels_df,
2/3,
)
trainingandtuning_features_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"trainingandtuning_features_df.csv",
)
trainingandtuning_labels_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"trainingandtuning_labels_df.csv",
)
testing_features_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"testing_features_df.csv",
)
testing_labels_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"testing_labels_df.csv",
)
training_features_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"training_features_df.csv",
)
training_labels_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"training_labels_df.csv",
)
tuning_features_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"tuning_features_df.csv",
)
tuning_labels_df_filename = joinpath(
PROJECT_OUTPUT_DIRECTORY,
"data",
"tuning_labels_df.csv",
)
FileIO.save(trainingandtuning_features_df_filename, trainingandtuning_features_df)
FileIO.save(trainingandtuning_labels_df_filename, trainingandtuning_labels_df)
FileIO.save(testing_features_df_filename, testing_features_df)
FileIO.save(testing_labels_df_filename, testing_labels_df)
FileIO.save(training_features_df_filename, training_features_df)
FileIO.save(training_labels_df_filename, training_labels_df)
FileIO.save(tuning_features_df_filename, tuning_features_df)
FileIO.save(tuning_labels_df_filename, tuning_labels_df)
### End data preprocessing code
# This file was generated by PredictMD version 0.34.21
# For help, please visit https://predictmd.net
This page was generated using Literate.jl.