This is an extension of logistic regression (2 groups) to the problem of modeling multiple groups. Here we have the response variable \(Y_i\) possibly taking \(G\) differnet values. We also have explanatory variables \((X_{i1}, X_{i2},...,X_{ik})\). In this case, a base category (e.g. group 1) is fixed and the model is formulated as follows and estimation is by maximum likelihood method.
\[ \log\frac{P(Y_i=j)}{P(Y_i=1)}=\beta_{0j}+ \beta_{1j}X_{i1} + \beta_{2j}X_{i2}+...+\beta_{kj}X_{ik} \]
We consider the iris data of R. Fisher as an example.
library(kernlab) # to get spam data
library(MASS)
library(nnet)
data(iris)
X<-iris
speciesnames<-as.matrix(unique(X$Species))
fit<- multinom(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data = X)
## # weights: 18 (10 variable)
## initial value 164.791843
## iter 10 value 16.177348
## iter 20 value 7.111438
## iter 30 value 6.182999
## iter 40 value 5.984028
## iter 50 value 5.961278
## iter 60 value 5.954900
## iter 70 value 5.951851
## iter 80 value 5.950343
## iter 90 value 5.949904
## iter 100 value 5.949867
## final value 5.949867
## stopped after 100 iterations
summary(fit)
## Call:
## multinom(formula = Species ~ Sepal.Length + Sepal.Width + Petal.Length +
## Petal.Width, data = X)
##
## Coefficients:
## (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 18.69037 -5.458424 -8.707401 14.24477 -3.097684
## virginica -23.83628 -7.923634 -15.370769 23.65978 15.135301
##
## Std. Errors:
## (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 34.97116 89.89215 157.0415 60.19170 45.48852
## virginica 35.76649 89.91153 157.1196 60.46753 45.93406
##
## Residual Deviance: 11.89973
## AIC: 31.89973
pred<-fitted.values(fit)
predclass<- speciesnames[apply(pred, 1, order)[3,]]
round(pred[1:10,],3)
## setosa versicolor virginica
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 0 0
## 5 1 0 0
## 6 1 0 0
## 7 1 0 0
## 8 1 0 0
## 9 1 0 0
## 10 1 0 0
ct <- table(as.matrix(X$Species), predclass)
diag(prop.table(ct, 1))
## setosa versicolor virginica
## 1.00 0.98 0.98
# total percent correct
sum(diag(prop.table(ct)))
## [1] 0.9866667
prop.table(ct)
## predclass
## setosa versicolor virginica
## setosa 0.333333333 0.000000000 0.000000000
## versicolor 0.000000000 0.326666667 0.006666667
## virginica 0.000000000 0.006666667 0.326666667
Here, we use data on financials to model bond ratings
# Bond Rating
Y<-read.csv('./BondRating.csv')
names<-as.matrix(unique(Y$RATING)) # get names of the ratings in the dataset
X<-Y[,-2]
fit <- multinom(RATING ~ . , data=X)
## # weights: 84 (66 variable)
## initial value 157.618722
## iter 10 value 114.767290
## iter 20 value 92.387030
## iter 30 value 86.885106
## iter 40 value 79.003424
## iter 50 value 71.810807
## iter 60 value 70.220428
## iter 70 value 69.633917
## iter 80 value 68.875707
## iter 90 value 68.552217
## iter 100 value 68.405388
## final value 68.405388
## stopped after 100 iterations
fit # show results
## Call:
## multinom(formula = RATING ~ ., data = X)
##
## Coefficients:
## (Intercept) LOPMAR LFIXCHAR LGEARRAT LTDCAP LLEVER
## AA 11.737381 4.431246 0.4977734 28.43283 -72.366878 -8.332015
## AAA -19.707701 20.690291 -2.5219974 58.38269 -112.796310 -9.959435
## B -58.421478 1.313711 3.3796214 70.69563 12.375591 -49.795630
## BA 7.220959 3.544558 5.1771683 -73.29690 3.128234 42.927291
## BAA 79.796050 -14.604635 -2.0461711 -80.80969 -29.699923 44.097903
## C 25.756516 -3.298133 -1.9047725 -19.46262 6.768439 16.151566
## LCASHLTD LACIDRAT LCURRAT LRECTURN LASSLTD
## AA -3.244978 3.11965202 -1.098027 5.113555 19.50371
## AAA -14.954031 11.63956016 1.724744 13.595328 67.14946
## B -13.025453 2.93387785 -8.187397 -3.146939 72.03592
## BA -17.165621 4.96603100 -1.761685 1.379763 -54.10029
## BAA 9.371217 -21.52766013 21.401185 -15.070100 -93.32279
## C -1.205051 -0.06457617 -2.912479 -4.753741 -28.47914
##
## Residual Deviance: 136.8108
## AIC: 268.8108
#fit$class
pred<-fitted.values(fit)
cbind(X$RATING, pred)[1:10,]
## A AA AAA B BA
## 1 3 8.940352e-03 0.005031110 0.649699685 2.815172e-03 3.305989e-01
## 2 3 4.274266e-02 0.002944607 0.273645572 2.936816e-01 4.583117e-04
## 3 3 3.113283e-02 0.112335901 0.856010223 7.797141e-05 3.640326e-04
## 4 3 1.477013e-02 0.548161460 0.293098449 8.282590e-06 1.439614e-01
## 5 3 6.270380e-02 0.499508964 0.081322628 1.756883e-03 3.547074e-01
## 6 3 4.083808e-05 0.001216733 0.631699897 1.526545e-08 3.670425e-01
## 7 3 2.993539e-03 0.001027265 0.535386575 1.628278e-02 1.557380e-01
## 8 3 1.388466e-01 0.090157211 0.646175804 4.143425e-03 2.442525e-07
## 9 3 2.982332e-02 0.009845506 0.933604006 6.027175e-09 2.071931e-02
## 10 2 4.387836e-01 0.297447225 0.001600129 6.083785e-06 2.599889e-01
## BAA C
## 1 7.363268e-04 2.178456e-03
## 2 6.592238e-05 3.864613e-01
## 3 7.904102e-05 1.649999e-12
## 4 3.252608e-07 2.342778e-11
## 5 8.935060e-08 2.286929e-07
## 6 2.741646e-12 1.157556e-13
## 7 3.890999e-12 2.885718e-01
## 8 1.206670e-01 9.691384e-06
## 9 6.007827e-03 1.832441e-08
## 10 2.174018e-03 2.426333e-09
names<-colnames(pred)
predclass<- names[apply(pred, 1, order)[7,]] # Assigns X to the class with highest predicted probability
# confusion Matrix on development sample
ct <- table(as.matrix(X$RATING), predclass)
diag(prop.table(ct, 1))
## A AA AAA B BA BAA C
## 0.4166667 0.6153846 0.6666667 0.5454545 0.6923077 0.7692308 0.7000000
# total percent correct
sum(diag(prop.table(ct)))
## [1] 0.6296296
round(prop.table(ct),3)
## predclass
## A AA AAA B BA BAA C
## A 0.062 0.049 0.000 0.012 0.025 0.000 0.000
## AA 0.012 0.099 0.025 0.000 0.012 0.012 0.000
## AAA 0.000 0.025 0.074 0.000 0.000 0.000 0.012
## B 0.000 0.000 0.012 0.074 0.012 0.000 0.037
## BA 0.025 0.012 0.012 0.000 0.111 0.000 0.000
## BAA 0.000 0.012 0.012 0.000 0.012 0.123 0.000
## C 0.012 0.000 0.000 0.012 0.000 0.012 0.086
round(summary(fit)$coeff,2)
## (Intercept) LOPMAR LFIXCHAR LGEARRAT LTDCAP LLEVER LCASHLTD LACIDRAT
## AA 11.74 4.43 0.50 28.43 -72.37 -8.33 -3.24 3.12
## AAA -19.71 20.69 -2.52 58.38 -112.80 -9.96 -14.95 11.64
## B -58.42 1.31 3.38 70.70 12.38 -49.80 -13.03 2.93
## BA 7.22 3.54 5.18 -73.30 3.13 42.93 -17.17 4.97
## BAA 79.80 -14.60 -2.05 -80.81 -29.70 44.10 9.37 -21.53
## C 25.76 -3.30 -1.90 -19.46 6.77 16.15 -1.21 -0.06
## LCURRAT LRECTURN LASSLTD
## AA -1.10 5.11 19.50
## AAA 1.72 13.60 67.15
## B -8.19 -3.15 72.04
## BA -1.76 1.38 -54.10
## BAA 21.40 -15.07 -93.32
## C -2.91 -4.75 -28.48
summary(fit)$standard.errors
## (Intercept) LOPMAR LFIXCHAR LGEARRAT LTDCAP LLEVER LCASHLTD LACIDRAT
## AA 31.83374 3.235624 1.323922 30.70983 60.87541 16.22329 4.175166 3.910530
## AAA 19.61618 7.186434 2.464353 36.27346 40.79986 17.85427 6.171757 5.089015
## B 51.63854 3.852885 2.453736 58.82617 114.94067 27.72559 6.484654 4.880095
## BA 21.06603 2.908066 1.804241 43.12921 36.15916 22.68562 5.246616 3.842039
## BAA 55.45235 6.989684 1.861628 45.34033 105.38147 22.09295 7.422105 9.625395
## C 191.42933 3.778619 3.547979 96.62102 373.10166 26.27910 6.503547 4.483231
## LCURRAT LRECTURN LASSLTD
## AA 4.100977 3.227411 27.50994
## AAA 5.007507 5.064849 34.75602
## B 6.129798 4.273424 42.79773
## BA 4.496332 3.263992 37.75283
## BAA 8.585059 6.828358 42.01908
## C 5.203328 4.079773 42.89455
Y<-read.csv('BondRating.csv')
names<-as.matrix(unique(Y$RATING))
X<-Y[,-1]
fit <- polr(ordered(CODERTG) ~ . , data=X)
fit # show results
## Call:
## polr(formula = ordered(CODERTG) ~ ., data = X)
##
## Coefficients:
## LOPMAR LFIXCHAR LGEARRAT LTDCAP LLEVER LCASHLTD
## -1.7334093 0.3323669 -12.8942367 39.0702649 2.4168597 -0.9868148
## LACIDRAT LCURRAT LRECTURN LASSLTD
## -1.5229496 -0.3702369 -2.0676946 -7.6601086
##
## Intercepts:
## 1|2 2|3 3|4 4|5 5|6 6|7
## 8.469632 9.911288 10.845631 11.827710 13.229760 14.874021
##
## Residual Deviance: 254.7475
## AIC: 286.7475
t(round(fit$coeff,2))
## LOPMAR LFIXCHAR LGEARRAT LTDCAP LLEVER LCASHLTD LACIDRAT LCURRAT LRECTURN
## [1,] -1.73 0.33 -12.89 39.07 2.42 -0.99 -1.52 -0.37 -2.07
## LASSLTD
## [1,] -7.66
pred<-fitted.values(fit)
predclass<- apply(pred, 1, order)[7,]
summary(fit)
##
## Re-fitting to get Hessian
## Call:
## polr(formula = ordered(CODERTG) ~ ., data = X)
##
## Coefficients:
## Value Std. Error t value
## LOPMAR -1.7334 0.8072 -2.1474
## LFIXCHAR 0.3324 0.5274 0.6302
## LGEARRAT -12.8942 8.7291 -1.4772
## LTDCAP 39.0703 10.7471 3.6354
## LLEVER 2.4169 4.6371 0.5212
## LCASHLTD -0.9868 1.2307 -0.8018
## LACIDRAT -1.5229 1.1259 -1.3526
## LCURRAT -0.3702 1.4168 -0.2613
## LRECTURN -2.0677 0.7688 -2.6893
## LASSLTD -7.6601 7.4245 -1.0317
##
## Intercepts:
## Value Std. Error t value
## 1|2 8.4696 4.7781 1.7726
## 2|3 9.9113 4.7765 2.0750
## 3|4 10.8456 4.7772 2.2703
## 4|5 11.8277 4.7965 2.4659
## 5|6 13.2298 4.8861 2.7077
## 6|7 14.8740 4.9960 2.9772
##
## Residual Deviance: 254.7475
## AIC: 286.7475
# confusion Matrix on development sample
ct <- table((X$CODERTG), predclass)
diag(prop.table(ct, 1))
## 1 2 3 4 5 6 7
## 0.44444444 0.53846154 0.00000000 0.07692308 0.46153846 0.36363636 0.40000000
# total percent correct
sum(diag(prop.table(ct)))
## [1] 0.3209877
xx<-fitted.values(fit)[1,]
xi<-X[1,2:11]
xi
## LOPMAR LFIXCHAR LGEARRAT LTDCAP LLEVER LCASHLTD LACIDRAT LCURRAT LRECTURN
## 1 -1.663 0.749 -0.491 0.378 0.16 -1.225 0.433 1.12 1.629
## LASSLTD
## 1 1.277
ee1<- 8.469632-sum(as.numeric(xi)* as.numeric(fit$coeff))
ee2<- 9.911288-sum(as.numeric(xi)* as.numeric(fit$coeff))
ee3<- 10.8456-sum(as.numeric(xi)* as.numeric(fit$coeff))
ee4<- 11.8277-sum(as.numeric(xi)* as.numeric(fit$coeff))
ee5<- 13.2298-sum(as.numeric(xi)* as.numeric(fit$coeff))
ee6<- 14.8740-sum(as.numeric(xi)* as.numeric(fit$coeff))
cum_p<-c( 0, exp(ee1)/(1+exp(ee1)), exp(ee2)/(1+exp(ee2)), exp(ee3)/(1+exp(ee3)), exp(ee4)/(1+exp(ee4)), exp(ee5)/(1+exp(ee5)), exp(ee6)/(1+exp(ee6)),1)
cum_p[-1]-cum_p[-8]
## [1] 0.04177394 0.11385008 0.16370808 0.23674526 0.27972991 0.12763279 0.03655994
pred[1,]
## 1 2 3 4 5 6 7
## 0.04177396 0.11385000 0.16371493 0.23674086 0.27972202 0.12763904 0.03655919