Multinomial logistic regression

import MaximumLikelihoodProblems

import Distributions
import ForwardDiff
import LogDensityProblems
import NNlib
import TransformVariables

struct MultinomialLogisticRegression{Ty, TX}
    y::Ty
    X::TX
end

function (problem::MultinomialLogisticRegression)(θ)
    y = problem.y
    X = problem.X

    β = θ.β

    num_rows = size(X, 1)
    num_covariates = size(β, 1) # `size(β, 1)` is equal to `size(X, 2)`
    num_classes = size(β, 2) + 1

    # the first column of all zeros corresponds to the base class
    # i.e. the coefficient β₀ for the base class is always fixed to be zero
    β_with_base_class = hcat(zeros(num_covariates), β)
    η = X * β_with_base_class

    μ = NNlib.softmax(η; dims=2)
    log_likelihood = sum([Distributions.logpdf(Distributions.Multinomial(1, μ[i, :]), y[i, :]) for i = 1:num_rows])
    return log_likelihood
end

N = 10_000

# the first column (the column of all ones) is the intercept
X = hcat(ones(N), randn(N))

size_β = (2, 3)
β_true = [1.0 2.0 3.0; 4.0 5.0 6.0]
num_covariates = size(β_true, 1)
β_true_with_base_class = hcat(zeros(num_covariates), β_true)
η_true = X * β_true_with_base_class
μ_true = NNlib.softmax(η_true; dims=2)
y = vcat([rand(Distributions.Multinomial(1, μ_true[i,:]))' for i in 1:N]...)

problem = MultinomialLogisticRegression(y, X)
transformation = TransformVariables.as((β = TransformVariables.as(Array, size_β), ))
transformed_problem = LogDensityProblems.TransformedLogDensity(transformation,
                                                               problem)
transformed_gradient_problem = LogDensityProblems.ADgradient(:ForwardDiff,
                                                             transformed_problem)

β_hat_initial_guess = zeros(size_β)
θ_hat_initial = (; β = β_hat_initial_guess)
(β = [0.0 0.0 0.0; 0.0 0.0 0.0],)

θhat: (I recommend changing the value of `showprogress_metertotrue` so that you can see the progress.)

θ_hat = MaximumLikelihoodProblems.fit(transformed_gradient_problem,
                                      θ_hat_initial;
                                      show_progress_meter = false) # change this `false` to `true`
(β = [0.9575972848620831 2.02295593732928 2.9920377512384904; 3.9787586087579543 5.003698188645785 6.0448436626205355],)

β_hat:

β_hat = θ_hat[:β]
2×3 Array{Float64,2}:
 0.957597  2.02296  2.99204
 3.97876   5.0037   6.04484

β_hat_with_base_class:

num_covariates = size(β_hat, 1)
β_hat_with_base_class = hcat(zeros(num_covariates), β_hat)
2×4 Array{Float64,2}:
 0.0  0.957597  2.02296  2.99204
 0.0  3.97876   5.0037   6.04484

Value of the log likelihood function evaluated at θ_hat:

MaximumLikelihoodProblems.loglikelihood(transformed_gradient_problem, θ_hat)
-6777.968835061336

This page was generated using Literate.jl.