marginal effects of mlogit in R

2019-02-15 17:42发布

问题:

I am new to R, and I don't understand yet completely the logic of its calculations...

I cannot overcome my problem with the help from previous posts either.

I have a data set of about 600 observations for 11 variables. I have successfully run the multinomial model on it, however I cannot achieve the marginal effects because my mean() command is getting NAs:

The data set:

> head(data,n=50)
    ID time      CHINN      DEBT ERA         INFL     MONEY  OPENNESS    RESERVES      RGDP       RSVS
1  POL 1993 -1.8639720        NA   0   32.8815343  33.47353  41.05223  4276726138 100.00000  4.2767261
2  POL 1994 -0.8081098        NA   0   30.7586977  31.98628  41.26984  6023061884 105.13912  6.0230619
3  POL 1995 -0.1129656        NA   0   26.0622777  31.69277  44.24652 14957024390 112.15091 14.9570244
4  POL 1996 -1.1688280        NA   0   19.1794515  33.19738  46.04792 18018686050 119.00579 18.0186860
5  POL 1997 -1.1688280        NA   0   14.8181395  35.11881  50.74269 20669498248 127.43530 20.6694982
6  POL 1998 -1.1688280        NA   4   11.1353669  37.70208  56.79412 28276350644 133.65597 28.2763506
7  POL 1999 -1.1688280        NA   4    7.0830285  40.69941  54.29511 27314254555 139.74225 27.3142546
8  POL 2000 -1.1688280  32.96343   1   10.7321436  40.60334  60.67079 27469379821 145.66606 27.4693798
9  POL 2001 -1.1688280  41.37857   1    4.3657643  44.43624  57.77678 26563086922 147.25896 26.5630869
10 POL 2002  0.0644257        NA   1    0.4689145  41.98616  60.73279 29783861006 149.32464 29.7838610
11 POL 2003  0.0644257        NA   1   -0.3092132  42.42629  69.31849 33958963841 154.95895 33.9589638
12 POL 2004  0.0644257        NA   1    2.5056441  40.21144  77.33549 36772764782 162.98411 36.7727648
13 POL 2005  0.0644257        NA   1    1.4888388  43.50525  74.91127 42560657450 168.54956 42.5606575
14 POL 2006  0.0644257        NA   1    0.8029315  46.92580  82.51674 48473947849 178.83656 48.4739478
15 POL 2007  0.0644257        NA   1    0.7577241  47.82156  84.38853 65724834811 190.53326 65.7248348
16 POL 2008  0.0644257        NA   1    3.5756117  52.36352  83.76331 62183606786 199.83465 62.1836068
17 POL 2009  0.0644257        NA   1    2.6459580  53.71374  78.80573 79521598778 203.40385 79.5215988
18 POL 2010  0.0644257        NA   1    1.6733804  55.42878  85.68774 93472496388 211.50103 93.4724964
19 POL 2011  0.0644257        NA   1    3.0274845  57.87980  91.29239 97712443397 220.71189 97.7124434
20 BGR 1993         NA        NA   1   68.8945207  78.96788  84.04149  1052450357 100.00000  1.0524504
21 BGR 1994 -1.1688280        NA   1   93.5639075  79.70282  90.72013  1396927258 101.92207  1.3969273
22 BGR 1995 -1.1688280        NA   1   60.0454149  67.02978 101.82717  1635188166 104.48391  1.6351882
23 BGR 1996 -0.9050840        NA   1  120.9697303  83.30492 116.19712   864262494  95.38304  0.8642625
24 BGR 1997 -0.9050840        NA   5 1058.1103740  35.17444 112.09386  2485359931  93.68869  2.4853599
25 BGR 1998 -0.9050840        NA   6   18.0824730  28.90920 117.17808  3056954172  97.96299  3.0569542
26 BGR 1999 -0.9050840        NA   6    2.3810375  30.99876 116.55936  3264673405  99.87455  3.2646734
27 BGR 2000 -0.9050840        NA   6   10.9885883  35.97481 106.26211  3507199103 105.59192  3.5071991
28 BGR 2001 -1.1688280        NA   6    6.2354669  40.92721 106.86605  3646131855 109.94781  3.6461319
29 BGR 2002 -1.1688280        NA   6    4.3788710  41.75304 102.98033  4846429828 114.94803  4.8464298
30 BGR 2003 -0.9050840        NA   6    1.0599753  46.25984 107.38463  6825720096 120.93287  6.8257201
31 BGR 2004 -0.6413398        NA   6    5.2752300  51.21173 115.32294  9337108247 128.76534  9.3371082
32 BGR 2005 -0.3775955        NA   6    4.4206316  55.53535  96.16404  8697081229 136.82237  8.6970812
33 BGR 2006  2.1752650        NA   6    6.9496009  61.91483 140.00543 11756001804 145.51154 11.7560018
34 BGR 2007  2.4390090        NA   6    6.7721512  69.93833 138.64999 17544560083 154.43412 17.5445601
35 BGR 2008  2.4390090        NA   6   11.5750056  66.12314 136.94939 17930378450 163.65291 17.9303784
36 BGR 2009  2.4390090  13.82140   6    1.5731668  69.86327 103.84837 18522120691 154.75483 18.5221207
37 BGR 2010  2.4390090  14.93887   6    1.4049372  71.96152 116.71516 17223201500 155.40066 17.2232015
38 BGR 2011  2.4390090  15.44620   6    2.9890214  75.58550 132.27417 17215734344 158.00846 17.2157343
39 CYP 1993 -0.1129656        NA   5    0.8698960 129.36758  95.41637  1276350873 100.00000  1.2763509
40 CYP 1994 -0.1129656        NA   5    2.2051585 131.48065  95.77615  1640273818 105.77324  1.6402738
41 CYP 1995 -0.1129656        NA   5    0.6063991 128.64850 100.10371  1294935266 111.96553  1.2949353
42 CYP 1996 -1.1688280        NA   5    2.3411552 136.94871 104.44163  1704484151 114.02390  1.7044842
43 CYP 1997 -1.1688280        NA   5    3.3418671 145.38644 105.39959  1525648570 116.75559  1.5256486
44 CYP 1998 -1.1688280 261.72872   4    1.6379206 147.00670 100.52824  1512684182 122.55893  1.5126842
45 CYP 1999 -1.1688280 155.75745   4    1.4380285 159.30150 101.75789  1967461070 128.47057  1.9674611
46 CYP 2000 -1.1688280 156.25023   4    4.8139861 161.29472 109.87129  1868515552 135.00970  1.8685156
47 CYP 2001 -1.1688280 160.87807   4    0.8515728 169.52762 109.90206  2396061109 140.39178  2.3960611
48 CYP 2002 -1.1688280 170.52011   4    1.3698956 175.62242 103.06500  3181358048 143.20035  3.1813580
49 CYP 2003 -0.1129656 179.11164   4    3.0419336 174.50803  95.17382  3450932640 145.59528  3.4509326
50 CYP 2004  1.3840320 180.22661   4    1.2153140 171.42676  98.02155  4113824175 151.47324  4.1138242

commands for transforming the data:

pdata<-plm.data(data,index=c("ID","time"))
mldata<-mlogit.data(pdata,choice="ERA",shape="wide")

command for the matrix of means values:

z<-with(mldata,data.frame(CHINN=mean(CHINN),DEBT=mean(DEBT),INFL=mean(INFL),MONEY=mean(MONEY),OPENNESS=mean(OPENNESS),RGDP=mean(RGDP),RSVS=mean(RSVS)))

The output I get:

  CHINN DEBT INFL MONEY OPENNESS     RGDP RSVS
1    NA   NA   NA    NA       NA 133.4633   NA

Could you advise on the reason of the NAs for this command?

I would understand if it would not give a correct output for DEBT which is mostly NAs, but why it is not calculating the mean for RSVS, CHINN and others?

Why does it calculate the mean for RGDP? Both variables has:

> class(mldata$CHINN)
[1] "numeric"
> class(mldata$RGDP)
[1] "numeric"

How to overcome this problem?

UPDATE:

Thanks to the David Arenburg's comment I managed to calculate the means:

> z
     CHINN     DEBT     INFL   MONEY OPENNESS     RGDP     RSVS
1 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734

However there is another error that occurs while calculating the marginal effects:

effects(mlogit.data2,covariate="CHINN",data=z)
Error in predict.mlogit(object, data) : 
  the number of rows of the data.frame should be a multiple of the number of alternatives

After copying the means 5 times to get a matrix zz of 6 rows for 6 alternatives:

> zz
     CHINN     DEBT     INFL   MONEY OPENNESS     RGDP     RSVS
1 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734
2 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734
3 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734
4 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734
5 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734
6 1.342326 59.85562 33.88494 58.2304 95.88219 133.4633 29.21734

The effects() command gives an output:

> effects(mlogit.data2,covariate=c("CHINN","DEBT","INFL","MONEY","OPENNESS","RGDP","RSVS"),data=zz)
            2             0             1             4             5             6 
-1.288185e-02 -1.933325e-02 -8.801676e-06  1.095252e-02  2.120814e-02  6.324152e-05 

However I cannot draw any conclusions on this output - I do not know to which variable I can assign those marginal effects.

When I try running one by one I get another error:

> effects(mlogit.data2,covariate=c("CHINN"),data=zz)
Error in if (rhs %in% c(1, 3)) { : argument is of length zero

The model command:

mlogit.data2<-mlogit(ERA~1|CHINN+INFL+MONEY+OPENNESS+RGDP+RSVS+DEBT,data=mldata,reflevel="4")

The packages I am using:

library(Formula)
library(miscTools)
library(lattice)
library(zoo)
library(sandwich)
library(maxLik)
library(lmtest)
library(statmod)
library(mlogit)
library(plm)

Thank you in advance, Zyta

回答1:

Ok, seems like I have kind of solution with the help of:

http://www.talkstats.com/showthread.php/44314-calculate-marginal-effects-using-mlogit-package

and

How can I view the source code for a function?

apparently it was enough to see for the:

> getAnywhere(effects.mlogit)
A single object matching ‘effects.mlogit’ was found
It was found in the following places
  registered S3 method for effects from namespace mlogit
  namespace:mlogit
with value

function (object, covariate = NULL, type = c("aa", "ar", "rr", 
    "ra"), data = NULL, ...) 
{
    type <- match.arg(type)
    if (is.null(data)) {
        P <- predict(object, returnData = TRUE)
        data <- attr(P, "data")
        attr(P, "data") <- NULL
    }
    else P <- predict(object, data)
    newdata <- data
    J <- length(P)
    alt.levels <- names(P)
    pVar <- substr(type, 1, 1)
    xVar <- substr(type, 2, 2)
    cov.list <- lapply(attr(formula(object), "rhs"), as.character)
    rhs <- sapply(cov.list, function(x) length(na.omit(match(x, 
        covariate))) > 0)
    rhs <- (1:length(cov.list))[rhs]
    eps <- 1e-05
    if (rhs %in% c(1, 3)) {
        if (rhs == 3) {
            theCoef <- paste(alt.levels, covariate, sep = ":")
            theCoef <- coef(object)[theCoef]
        }
        else theCoef <- coef(object)[covariate]
        me <- c()
        for (l in 1:J) {
            newdata[l, covariate] <- data[l, covariate] + eps
            newP <- predict(object, newdata)
            me <- rbind(me, (newP - P)/eps)
            newdata <- data
        }
        if (pVar == "r") 
            me <- t(t(me)/P)
        if (xVar == "r") 
            me <- me * matrix(rep(data[[covariate]], J), J)
        dimnames(me) <- list(alt.levels, alt.levels)
    }
    if (rhs == 2) {
        newdata[, covariate] <- data[, covariate] + eps
        newP <- predict(object, newdata)
        me <- (newP - P)/eps
        if (pVar == "r") 
            me <- me/P
        if (xVar == "r") 
            me <- me * data[[covariate]]
        names(me) <- alt.levels
    }
    me
}
<environment: namespace:mlogit>

Then to copy the function and modyfy its 16 line:

myeffects<-function (object, covariate = NULL, type = c("aa", "ar", "rr", 
                                             "ra"), data = NULL, ...) 
{
    type <- match.arg(type)
    if (is.null(data)) {
        P <- predict(object, returnData = TRUE)
        data <- attr(P, "data")
        attr(P, "data") <- NULL
    }
    else P <- predict(object, data)
    newdata <- data
    J <- length(P)
    alt.levels <- names(P)
    pVar <- substr(type, 1, 1)
    xVar <- substr(type, 2, 2)
    cov.list <- strsplit(as.character(attr(formula(object), "rhs")), " + ", fixed = TRUE)
    rhs <- sapply(cov.list, function(x) length(na.omit(match(x, 
                                                             covariate))) > 0)
    rhs <- (1:length(cov.list))[rhs]
    eps <- 1e-05
    if (rhs %in% c(1, 3)) {
        if (rhs == 3) {
            theCoef <- paste(alt.levels, covariate, sep = ":")
            theCoef <- coef(object)[theCoef]
        }
        else theCoef <- coef(object)[covariate]
        me <- c()
        for (l in 1:J) {
            newdata[l, covariate] <- data[l, covariate] + eps
            newP <- predict(object, newdata)
            me <- rbind(me, (newP - P)/eps)
            newdata <- data
        }
        if (pVar == "r") 
            me <- t(t(me)/P)
        if (xVar == "r") 
            me <- me * matrix(rep(data[[covariate]], J), J)
        dimnames(me) <- list(alt.levels, alt.levels)
    }
    if (rhs == 2) {
        newdata[, covariate] <- data[, covariate] + eps
        newP <- predict(object, newdata)
        me <- (newP - P)/eps
        if (pVar == "r") 
            me <- me/P
        if (xVar == "r") 
            me <- me * data[[covariate]]
        names(me) <- alt.levels
    }
    me
}

Now the results are as of:

 > myeffects(mlogit.data2,covariate="RSVS",data=zz)
            2             0             1             4             5             6 
 3.612318e-03  5.368693e-04 -4.903995e-08 -5.382731e-03  1.238768e-03 -5.175053e-06


回答2:

You could use colMeans

  op <- options(scipen= 100, digits=2)
  colMeans(mldata[,3:11], na.rm=TRUE)
   #  CHINN           DEBT            ERA           INFL          MONEY 
   #  -0.27         115.25           0.20          33.66          74.66 
   #OPENNESS       RESERVES           RGDP           RSVS 
   #  92.06 18809465124.14         136.56          18.81 
  options(op)

or summarise_each from dplyr

library(dplyr)
mldata %>%
summarise_each(funs(round(mean(., na.rm=TRUE),2)), CHINN:RSVS)
# CHINN   DEBT ERA  INFL MONEY OPENNESS    RESERVES   RGDP  RSVS
#1 -0.27 115.25 0.2 33.66 74.66    92.06 18809465124 136.56 18.81