This is an extension of logistic regression (2 groups) to the problem of modeling multiple groups. Here we have the response variable \(Y_i\) possibly taking \(G\) differnet values. We also have explanatory variables \((X_{i1}, X_{i2},...,X_{ik})\). In this case, a base category (e.g. group 1) is fixed and the model is formulated as follows and estimation is by maximum likelihood method.

\[ \log\frac{P(Y_i=j)}{P(Y_i=1)}=\beta_{0j}+ \beta_{1j}X_{i1} + \beta_{2j}X_{i2}+...+\beta_{kj}X_{ik} \]

Example 1

We consider the iris data of R. Fisher as an example.

library(kernlab)   # to get spam data
library(MASS)
library(nnet)

data(iris)
X<-iris
speciesnames<-as.matrix(unique(X$Species))
fit<- multinom(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data = X)
## # weights:  18 (10 variable)
## initial  value 164.791843 
## iter  10 value 16.177348
## iter  20 value 7.111438
## iter  30 value 6.182999
## iter  40 value 5.984028
## iter  50 value 5.961278
## iter  60 value 5.954900
## iter  70 value 5.951851
## iter  80 value 5.950343
## iter  90 value 5.949904
## iter 100 value 5.949867
## final  value 5.949867 
## stopped after 100 iterations
summary(fit)
## Call:
## multinom(formula = Species ~ Sepal.Length + Sepal.Width + Petal.Length + 
##     Petal.Width, data = X)
## 
## Coefficients:
##            (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor    18.69037    -5.458424   -8.707401     14.24477   -3.097684
## virginica    -23.83628    -7.923634  -15.370769     23.65978   15.135301
## 
## Std. Errors:
##            (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor    34.97116     89.89215    157.0415     60.19170    45.48852
## virginica     35.76649     89.91153    157.1196     60.46753    45.93406
## 
## Residual Deviance: 11.89973 
## AIC: 31.89973
pred<-fitted.values(fit)
predclass<- speciesnames[apply(pred, 1, order)[3,]]
round(pred[1:10,],3)
##    setosa versicolor virginica
## 1       1          0         0
## 2       1          0         0
## 3       1          0         0
## 4       1          0         0
## 5       1          0         0
## 6       1          0         0
## 7       1          0         0
## 8       1          0         0
## 9       1          0         0
## 10      1          0         0

Confusion matrix

ct <- table(as.matrix(X$Species), predclass)
diag(prop.table(ct, 1))
##     setosa versicolor  virginica 
##       1.00       0.98       0.98
# total percent correct
sum(diag(prop.table(ct)))
## [1] 0.9866667
prop.table(ct)
##             predclass
##                   setosa  versicolor   virginica
##   setosa     0.333333333 0.000000000 0.000000000
##   versicolor 0.000000000 0.326666667 0.006666667
##   virginica  0.000000000 0.006666667 0.326666667

Example 2

Here, we use data on financials to model bond ratings

# Bond Rating 
Y<-read.csv('./BondRating.csv')
names<-as.matrix(unique(Y$RATING))  # get names of the ratings in the dataset

Model fit and confusion matrix

X<-Y[,-2]

fit <- multinom(RATING ~ . , data=X)
## # weights:  84 (66 variable)
## initial  value 157.618722 
## iter  10 value 114.767290
## iter  20 value 92.387030
## iter  30 value 86.885106
## iter  40 value 79.003424
## iter  50 value 71.810807
## iter  60 value 70.220428
## iter  70 value 69.633917
## iter  80 value 68.875707
## iter  90 value 68.552217
## iter 100 value 68.405388
## final  value 68.405388 
## stopped after 100 iterations
fit # show results
## Call:
## multinom(formula = RATING ~ ., data = X)
## 
## Coefficients:
##     (Intercept)     LOPMAR   LFIXCHAR  LGEARRAT      LTDCAP     LLEVER
## AA    11.737381   4.431246  0.4977734  28.43283  -72.366878  -8.332015
## AAA  -19.707701  20.690291 -2.5219974  58.38269 -112.796310  -9.959435
## B    -58.421478   1.313711  3.3796214  70.69563   12.375591 -49.795630
## BA     7.220959   3.544558  5.1771683 -73.29690    3.128234  42.927291
## BAA   79.796050 -14.604635 -2.0461711 -80.80969  -29.699923  44.097903
## C     25.756516  -3.298133 -1.9047725 -19.46262    6.768439  16.151566
##       LCASHLTD     LACIDRAT   LCURRAT   LRECTURN   LASSLTD
## AA   -3.244978   3.11965202 -1.098027   5.113555  19.50371
## AAA -14.954031  11.63956016  1.724744  13.595328  67.14946
## B   -13.025453   2.93387785 -8.187397  -3.146939  72.03592
## BA  -17.165621   4.96603100 -1.761685   1.379763 -54.10029
## BAA   9.371217 -21.52766013 21.401185 -15.070100 -93.32279
## C    -1.205051  -0.06457617 -2.912479  -4.753741 -28.47914
## 
## Residual Deviance: 136.8108 
## AIC: 268.8108
#fit$class
pred<-fitted.values(fit)
cbind(X$RATING, pred)[1:10,]
##                 A          AA         AAA            B           BA
## 1  3 8.940352e-03 0.005031110 0.649699685 2.815172e-03 3.305989e-01
## 2  3 4.274266e-02 0.002944607 0.273645572 2.936816e-01 4.583117e-04
## 3  3 3.113283e-02 0.112335901 0.856010223 7.797141e-05 3.640326e-04
## 4  3 1.477013e-02 0.548161460 0.293098449 8.282590e-06 1.439614e-01
## 5  3 6.270380e-02 0.499508964 0.081322628 1.756883e-03 3.547074e-01
## 6  3 4.083808e-05 0.001216733 0.631699897 1.526545e-08 3.670425e-01
## 7  3 2.993539e-03 0.001027265 0.535386575 1.628278e-02 1.557380e-01
## 8  3 1.388466e-01 0.090157211 0.646175804 4.143425e-03 2.442525e-07
## 9  3 2.982332e-02 0.009845506 0.933604006 6.027175e-09 2.071931e-02
## 10 2 4.387836e-01 0.297447225 0.001600129 6.083785e-06 2.599889e-01
##             BAA            C
## 1  7.363268e-04 2.178456e-03
## 2  6.592238e-05 3.864613e-01
## 3  7.904102e-05 1.649999e-12
## 4  3.252608e-07 2.342778e-11
## 5  8.935060e-08 2.286929e-07
## 6  2.741646e-12 1.157556e-13
## 7  3.890999e-12 2.885718e-01
## 8  1.206670e-01 9.691384e-06
## 9  6.007827e-03 1.832441e-08
## 10 2.174018e-03 2.426333e-09
names<-colnames(pred)

predclass<- names[apply(pred, 1, order)[7,]]  # Assigns X to the class with highest predicted probability




# confusion Matrix on development sample
ct <- table(as.matrix(X$RATING), predclass)
diag(prop.table(ct, 1))
##         A        AA       AAA         B        BA       BAA         C 
## 0.4166667 0.6153846 0.6666667 0.5454545 0.6923077 0.7692308 0.7000000
# total percent correct
sum(diag(prop.table(ct)))
## [1] 0.6296296
round(prop.table(ct),3)
##      predclass
##           A    AA   AAA     B    BA   BAA     C
##   A   0.062 0.049 0.000 0.012 0.025 0.000 0.000
##   AA  0.012 0.099 0.025 0.000 0.012 0.012 0.000
##   AAA 0.000 0.025 0.074 0.000 0.000 0.000 0.012
##   B   0.000 0.000 0.012 0.074 0.012 0.000 0.037
##   BA  0.025 0.012 0.012 0.000 0.111 0.000 0.000
##   BAA 0.000 0.012 0.012 0.000 0.012 0.123 0.000
##   C   0.012 0.000 0.000 0.012 0.000 0.012 0.086
round(summary(fit)$coeff,2)
##     (Intercept) LOPMAR LFIXCHAR LGEARRAT  LTDCAP LLEVER LCASHLTD LACIDRAT
## AA        11.74   4.43     0.50    28.43  -72.37  -8.33    -3.24     3.12
## AAA      -19.71  20.69    -2.52    58.38 -112.80  -9.96   -14.95    11.64
## B        -58.42   1.31     3.38    70.70   12.38 -49.80   -13.03     2.93
## BA         7.22   3.54     5.18   -73.30    3.13  42.93   -17.17     4.97
## BAA       79.80 -14.60    -2.05   -80.81  -29.70  44.10     9.37   -21.53
## C         25.76  -3.30    -1.90   -19.46    6.77  16.15    -1.21    -0.06
##     LCURRAT LRECTURN LASSLTD
## AA    -1.10     5.11   19.50
## AAA    1.72    13.60   67.15
## B     -8.19    -3.15   72.04
## BA    -1.76     1.38  -54.10
## BAA   21.40   -15.07  -93.32
## C     -2.91    -4.75  -28.48
summary(fit)$standard.errors
##     (Intercept)   LOPMAR LFIXCHAR LGEARRAT    LTDCAP   LLEVER LCASHLTD LACIDRAT
## AA     31.83374 3.235624 1.323922 30.70983  60.87541 16.22329 4.175166 3.910530
## AAA    19.61618 7.186434 2.464353 36.27346  40.79986 17.85427 6.171757 5.089015
## B      51.63854 3.852885 2.453736 58.82617 114.94067 27.72559 6.484654 4.880095
## BA     21.06603 2.908066 1.804241 43.12921  36.15916 22.68562 5.246616 3.842039
## BAA    55.45235 6.989684 1.861628 45.34033 105.38147 22.09295 7.422105 9.625395
## C     191.42933 3.778619 3.547979 96.62102 373.10166 26.27910 6.503547 4.483231
##      LCURRAT LRECTURN  LASSLTD
## AA  4.100977 3.227411 27.50994
## AAA 5.007507 5.064849 34.75602
## B   6.129798 4.273424 42.79773
## BA  4.496332 3.263992 37.75283
## BAA 8.585059 6.828358 42.01908
## C   5.203328 4.079773 42.89455

Ordered logit

Y<-read.csv('BondRating.csv')
names<-as.matrix(unique(Y$RATING))

X<-Y[,-1]

fit <- polr(ordered(CODERTG) ~ . , data=X)
fit # show results
## Call:
## polr(formula = ordered(CODERTG) ~ ., data = X)
## 
## Coefficients:
##      LOPMAR    LFIXCHAR    LGEARRAT      LTDCAP      LLEVER    LCASHLTD 
##  -1.7334093   0.3323669 -12.8942367  39.0702649   2.4168597  -0.9868148 
##    LACIDRAT     LCURRAT    LRECTURN     LASSLTD 
##  -1.5229496  -0.3702369  -2.0676946  -7.6601086 
## 
## Intercepts:
##       1|2       2|3       3|4       4|5       5|6       6|7 
##  8.469632  9.911288 10.845631 11.827710 13.229760 14.874021 
## 
## Residual Deviance: 254.7475 
## AIC: 286.7475
t(round(fit$coeff,2))
##      LOPMAR LFIXCHAR LGEARRAT LTDCAP LLEVER LCASHLTD LACIDRAT LCURRAT LRECTURN
## [1,]  -1.73     0.33   -12.89  39.07   2.42    -0.99    -1.52   -0.37    -2.07
##      LASSLTD
## [1,]   -7.66
pred<-fitted.values(fit)

predclass<- apply(pred, 1, order)[7,]


summary(fit)
## 
## Re-fitting to get Hessian
## Call:
## polr(formula = ordered(CODERTG) ~ ., data = X)
## 
## Coefficients:
##             Value Std. Error t value
## LOPMAR    -1.7334     0.8072 -2.1474
## LFIXCHAR   0.3324     0.5274  0.6302
## LGEARRAT -12.8942     8.7291 -1.4772
## LTDCAP    39.0703    10.7471  3.6354
## LLEVER     2.4169     4.6371  0.5212
## LCASHLTD  -0.9868     1.2307 -0.8018
## LACIDRAT  -1.5229     1.1259 -1.3526
## LCURRAT   -0.3702     1.4168 -0.2613
## LRECTURN  -2.0677     0.7688 -2.6893
## LASSLTD   -7.6601     7.4245 -1.0317
## 
## Intercepts:
##     Value    Std. Error t value 
## 1|2   8.4696   4.7781     1.7726
## 2|3   9.9113   4.7765     2.0750
## 3|4  10.8456   4.7772     2.2703
## 4|5  11.8277   4.7965     2.4659
## 5|6  13.2298   4.8861     2.7077
## 6|7  14.8740   4.9960     2.9772
## 
## Residual Deviance: 254.7475 
## AIC: 286.7475
# confusion Matrix on development sample

ct <- table((X$CODERTG), predclass)



diag(prop.table(ct, 1))
##          1          2          3          4          5          6          7 
## 0.44444444 0.53846154 0.00000000 0.07692308 0.46153846 0.36363636 0.40000000
# total percent correct
sum(diag(prop.table(ct)))
## [1] 0.3209877

A check on how exactly prediction is computed in R

xx<-fitted.values(fit)[1,]

xi<-X[1,2:11]
xi
##   LOPMAR LFIXCHAR LGEARRAT LTDCAP LLEVER LCASHLTD LACIDRAT LCURRAT LRECTURN
## 1 -1.663    0.749   -0.491  0.378   0.16   -1.225    0.433    1.12    1.629
##   LASSLTD
## 1   1.277
ee1<- 8.469632-sum(as.numeric(xi)* as.numeric(fit$coeff))
ee2<- 9.911288-sum(as.numeric(xi)* as.numeric(fit$coeff))
ee3<- 10.8456-sum(as.numeric(xi)* as.numeric(fit$coeff))
ee4<- 11.8277-sum(as.numeric(xi)* as.numeric(fit$coeff))
ee5<- 13.2298-sum(as.numeric(xi)* as.numeric(fit$coeff))
ee6<- 14.8740-sum(as.numeric(xi)* as.numeric(fit$coeff))


cum_p<-c( 0, exp(ee1)/(1+exp(ee1)), exp(ee2)/(1+exp(ee2)), exp(ee3)/(1+exp(ee3)), exp(ee4)/(1+exp(ee4)), exp(ee5)/(1+exp(ee5)), exp(ee6)/(1+exp(ee6)),1)
cum_p[-1]-cum_p[-8]
## [1] 0.04177394 0.11385008 0.16370808 0.23674526 0.27972991 0.12763279 0.03655994
pred[1,]
##          1          2          3          4          5          6          7 
## 0.04177396 0.11385000 0.16371493 0.23674086 0.27972202 0.12763904 0.03655919