##### Beginning of file

# This file was generated by PredictMD version 0.19.0
# For help, please visit https://www.predictmd.net

import PredictMD

### Begin project-specific settings

PredictMD.require_julia_version("v0.6")

PredictMD.require_predictmd_version("0.19.0")

# PredictMD.require_predictmd_version("0.19.0", "0.20.0-")

PROJECT_OUTPUT_DIRECTORY = PredictMD.project_directory(
    homedir(),
    "Desktop",
    "breast_cancer_biopsy_example",
    )

### End project-specific settings

### Begin Knet neural network classifier code

import CSV
import Compat
import DataFrames
import FileIO
import JLD2
import Knet

srand(999)

trainingandvalidation_features_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "trainingandvalidation_features_df.csv",
    )
trainingandvalidation_labels_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "trainingandvalidation_labels_df.csv",
    )
testing_features_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "testing_features_df.csv",
    )
testing_labels_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "testing_labels_df.csv",
    )
training_features_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "training_features_df.csv",
    )
training_labels_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "training_labels_df.csv",
    )
validation_features_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "validation_features_df.csv",
    )
validation_labels_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "validation_labels_df.csv",
    )
trainingandvalidation_features_df = CSV.read(
    trainingandvalidation_features_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
trainingandvalidation_labels_df = CSV.read(
    trainingandvalidation_labels_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
testing_features_df = CSV.read(
    testing_features_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
testing_labels_df = CSV.read(
    testing_labels_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
training_features_df = CSV.read(
    training_features_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
training_labels_df = CSV.read(
    training_labels_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
validation_features_df = CSV.read(
    validation_features_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
validation_labels_df = CSV.read(
    validation_labels_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )

smoted_training_features_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "smoted_training_features_df.csv",
    )
smoted_training_labels_df_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "smoted_training_labels_df.csv",
    )
smoted_training_features_df = CSV.read(
    smoted_training_features_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )
smoted_training_labels_df = CSV.read(
    smoted_training_labels_df_filename,
    DataFrames.DataFrame;
    rows_for_type_detect = 100,
    )

categorical_feature_names_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "categorical_feature_names.jld2",
    )
continuous_feature_names_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "continuous_feature_names.jld2",
    )
categorical_feature_names = FileIO.load(
    categorical_feature_names_filename,
    "categorical_feature_names",
    )
continuous_feature_names = FileIO.load(
    continuous_feature_names_filename,
    "continuous_feature_names",
    )
feature_names = vcat(categorical_feature_names, continuous_feature_names)

single_label_name = :Class
negative_class = "benign"
positive_class = "malignant"
single_label_levels = [negative_class, positive_class]

categorical_label_names = Symbol[single_label_name]
continuous_label_names = Symbol[]
label_names = vcat(categorical_label_names, continuous_label_names)

knet_mlp_predict_function_source = """
function knetmlp_predict(
        w,
        x0::AbstractArray;
        probabilities::Bool = true,
        )
    x1 = Knet.relu.( w[1]*x0 .+ w[2] )
    x2 = Knet.relu.( w[3]*x1 .+ w[4] )
    x3 = w[5]*x2 .+ w[6]
    unnormalizedlogprobs = x3
    if probabilities
        normalizedlogprobs = Knet.logp(unnormalizedlogprobs, 1)
        normalizedprobs = exp.(normalizedlogprobs)
        return normalizedprobs
    else
        return unnormalizedlogprobs
    end
end
"""

knet_mlp_loss_function_source = """
function knetmlp_loss(
        predict::Function,
        modelweights,
        x::AbstractArray,
        ytrue::AbstractArray;
        L1::Real = Cfloat(0),
        L2::Real = Cfloat(0),
        )
    loss = Knet.nll(
        predict(
            modelweights,
            x;
            probabilities = false,
            ),
        ytrue,
        1,
        )
    if L1 != 0
        loss += L1 * sum(sum(abs, w_i) for w_i in modelweights[1:2:end])
    end
    if L2 != 0
        loss += L2 * sum(sum(abs2, w_i) for w_i in modelweights[1:2:end])
    end
    return loss
end
"""

feature_contrasts = PredictMD.generate_feature_contrasts(
    smoted_training_features_df,
    feature_names,
    )

knetmlp_modelweights = Any[
    Cfloat.(
        0.1f0*randn(Cfloat,64,feature_contrasts.num_array_columns)
        ),
    Cfloat.(
        zeros(Cfloat,64,1)
        ),
    Cfloat.(
        0.1f0*randn(Cfloat,32,64)
        ),
    Cfloat.(
        zeros(Cfloat,32,1)
        ),
    Cfloat.(
        0.1f0*randn(Cfloat,2,32)
        ),
    Cfloat.(
        zeros(Cfloat,2,1)
        ),
    ]

knetmlp_losshyperparameters = Dict()
knetmlp_losshyperparameters[:L1] = Cfloat(0.0)
knetmlp_losshyperparameters[:L2] = Cfloat(0.0)

knetmlp_optimizationalgorithm = :Momentum
knetmlp_optimizerhyperparameters = Dict()
knetmlp_minibatchsize = 48

knet_mlp_classifier =
    PredictMD.single_labelmulticlassdataframeknetclassifier(
        feature_names,
        single_label_name,
        single_label_levels;
        package = :Knet,
        name = "Knet MLP",
        predict_function_source = knet_mlp_predict_function_source,
        loss_function_source = knet_mlp_loss_function_source,
        losshyperparameters = knetmlp_losshyperparameters,
        optimizationalgorithm = knetmlp_optimizationalgorithm,
        optimizerhyperparameters = knetmlp_optimizerhyperparameters,
        minibatchsize = knetmlp_minibatchsize,
        modelweights = knetmlp_modelweights,
        printlosseverynepochs = 100,
        maxepochs = 200,
        feature_contrasts = feature_contrasts,
        )

PredictMD.parse_functions!(knet_mlp_classifier)

PredictMD.fit!(
    knet_mlp_classifier,
    smoted_training_features_df,
    smoted_training_labels_df,
    validation_features_df,
    validation_labels_df,
    )

PredictMD.set_max_epochs!(knet_mlp_classifier, 1_000)

PredictMD.fit!(
    knet_mlp_classifier,
    smoted_training_features_df,
    smoted_training_labels_df,
    validation_features_df,
    validation_labels_df,
    )

knet_learningcurve_lossvsepoch = PredictMD.plotlearningcurve(
    knet_mlp_classifier,
    :loss_vs_epoch;
    )
PredictMD.open_plot(knet_learningcurve_lossvsepoch)

knet_learningcurve_lossvsepoch_skip10epochs = PredictMD.plotlearningcurve(
    knet_mlp_classifier,
    :loss_vs_epoch;
    startat = 10,
    endat = :end,
    )
PredictMD.open_plot(knet_learningcurve_lossvsepoch_skip10epochs)

knet_learningcurve_lossvsiteration = PredictMD.plotlearningcurve(
    knet_mlp_classifier,
    :loss_vs_iteration;
    window = 50,
    sampleevery = 10,
    )
PredictMD.open_plot(knet_learningcurve_lossvsiteration)

knet_learningcurve_lossvsiteration_skip100iterations =
    PredictMD.plotlearningcurve(
        knet_mlp_classifier,
        :loss_vs_iteration;
        window = 50,
        sampleevery = 10,
        startat = 100,
        endat = :end,
        )
PredictMD.open_plot(knet_learningcurve_lossvsiteration_skip100iterations)

knet_mlp_classifier_hist_training =
    PredictMD.plotsinglelabelbinaryclassifierhistogram(
        knet_mlp_classifier,
        smoted_training_features_df,
        smoted_training_labels_df,
        single_label_name,
        single_label_levels,
        )
PredictMD.open_plot(knet_mlp_classifier_hist_training)

knet_mlp_classifier_hist_testing =
        PredictMD.plotsinglelabelbinaryclassifierhistogram(
        knet_mlp_classifier,
        testing_features_df,
        testing_labels_df,
        single_label_name,
        single_label_levels,
        )
PredictMD.open_plot(knet_mlp_classifier_hist_testing)

PredictMD.singlelabelbinaryclassificationmetrics(
    knet_mlp_classifier,
    smoted_training_features_df,
    smoted_training_labels_df,
    single_label_name,
    positive_class;
    sensitivity = 0.95,
    )

PredictMD.singlelabelbinaryclassificationmetrics(
    knet_mlp_classifier,
    testing_features_df,
    testing_labels_df,
    single_label_name,
    positive_class;
    sensitivity = 0.95,
    )

knet_mlp_classifier_filename = joinpath(
    PROJECT_OUTPUT_DIRECTORY,
    "knet_mlp_classifier.jld2",
    )

PredictMD.save_model(knet_mlp_classifier_filename, knet_mlp_classifier)

### End Knet neural network classifier code

##### End of file
WARNING: Method definition knetmlp_predict(Any, AbstractArray{T, N} where N where T) in module PredictMD at none:6 overwritten at none:6.
WARNING: Method definition #knetmlp_predict(Array{Any, 1}, typeof(PredictMD.knetmlp_predict), Any, AbstractArray{T, N} where N where T) in module PredictMD overwritten.
WARNING: Method definition knetmlp_loss(Function, Any, AbstractArray{T, N} where N where T, AbstractArray{T, N} where N where T) in module PredictMD at none:9 overwritten at none:9.
WARNING: Method definition #knetmlp_loss(Array{Any, 1}, typeof(PredictMD.knetmlp_loss), Function, Any, AbstractArray{T, N} where N where T, AbstractArray{T, N} where N where T) in module PredictMD overwritten.
Info: Starting to train Knet model. Max epochs: 200.
Info: Epoch: 0. Loss (training set): 0.7201029. Loss (validation set): 0.6927045935199466.
Info: Epoch: 100. Loss (training set): 0.09744535. Loss (validation set): 0.10499530052334453.
Info: Epoch: 200. Loss (training set): 0.059762415. Loss (validation set): 0.07322175006846152.
Info: Finished training Knet model.
Info: Starting to train Knet model. Max epochs: 1000.
Info: Epoch: 200. Loss (training set): 0.059762415. Loss (validation set): 0.07322175006846152.
Info: Epoch: 300. Loss (training set): 0.045931436. Loss (validation set): 0.06903422926611998.
Info: Epoch: 400. Loss (training set): 0.03827687. Loss (validation set): 0.06807030173017072.
Info: Epoch: 500. Loss (training set): 0.033273607. Loss (validation set): 0.06853613498526183.
Info: Epoch: 600. Loss (training set): 0.029120164. Loss (validation set): 0.06944961839841199.
Info: Epoch: 700. Loss (training set): 0.025723964. Loss (validation set): 0.06958666157365702.
Info: Epoch: 800. Loss (training set): 0.02273111. Loss (validation set): 0.07064465877708066.
Info: Epoch: 900. Loss (training set): 0.019935755. Loss (validation set): 0.07275781294036861.
Info: Epoch: 1000. Loss (training set): 0.017285056. Loss (validation set): 0.07562355137457422.
Info: Finished training Knet model.
Info: Attempting to save model...
Info: Saved model to file "/tmp/tmpxV7LRJ/PREDICTMDTEMPDIRECTORY/knet_mlp_classifier.jld2"

This page was generated using Literate.jl.