consistent matched pairs in R

2019-07-10 04:02发布

问题:

So using the Matching Package (Link to package here)

We can work through a modified GenMatch example.

library(Matching)
data(lalonde)

#introduce an id vaiable
lalonde$ID <- 1:length(lalonde$age)

X = cbind(lalonde$age, lalonde$educ, lalonde$black, lalonde$hisp, 
          lalonde$married, lalonde$nodegr, lalonde$u74, lalonde$u75, 
          lalonde$re75, lalonde$re74)

BalanceMat <- cbind(lalonde$age, lalonde$educ, lalonde$black, 
                    lalonde$hisp, lalonde$married, lalonde$nodegr, 
                    lalonde$u74, lalonde$u75, lalonde$re75, lalonde$re74, 
                    I(lalonde$re74*lalonde$re75))

genout <- GenMatch(Tr=lalonde$treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE", 
                   pop.size=16, max.generations=10, wait.generations=1)

mout <- Match(Y=NULL, Tr=lalonde$treat, X=X,
              Weight.matrix=genout,
              replace=TRUE, ties=FALSE)

 

# here we set ties FALSE so we only have 1-1 Matching
summary(mout)

#now lets create our "Matched dataset"
treated <- lalonde[mout$index.treated,]
# and introduce an indetity variable for each pair
treated$Pair_ID <- treated$ID

non.treated <- lalonde[mout$index.control,]
non.treated$Pair_ID <- treated$ID

matched.data <- rbind(treated, non.treated)
matched.data <- matched.data[order(matched.data$Pair_ID),]

#this outputs which of the non-treated ID was paired with the first person
matched.data$ID[matched.data$Pair_ID==1 & matched.data$treat==0]

We see that for the data, the ID=1 is matched with ID=193

Now lets introduce some randomisation into the order of the data and see if we get the same pairs

n <- 500
P1 <- rep(NA, n)
P2 <- rep(NA, n)
P3 <- rep(NA, n)
P4 <- rep(NA, n)
P5 <- rep(NA, n)
P6 <- rep(NA, n)
P7 <- rep(NA, n)

for (i in 1:n) {
  lalonde <- lalonde[sample(1:nrow(lalonde)), ] # randomise order
  genout <- GenMatch(Tr=lalonde$treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE", 
                     pop.size=16, max.generations=10, wait.generations=1)
  mout <- Match(Y=NULL, Tr=lalonde$treat, X=X,
                Weight.matrix=genout,
                replace=TRUE, ties=FALSE)
  summary(mout)
  treated <- lalonde[mout$index.treated,]
  treated$Pair_ID <- treated$ID
  non.treated <- lalonde[mout$index.control,]
  non.treated$Pair_ID <- treated$ID
  matched.data <- rbind(treated, non.treated)
  matched.data <- matched.data[order(matched.data$Pair_ID),]
  P1[i] <- matched.data$ID[matched.data$Pair_ID==1 & matched.data$treat==0]
  P2[i] <- matched.data$ID[matched.data$Pair_ID==2 & matched.data$treat==0]
  P3[i] <- matched.data$ID[matched.data$Pair_ID==3 & matched.data$treat==0]
  P4[i] <- matched.data$ID[matched.data$Pair_ID==4 & matched.data$treat==0]
  P5[i] <- matched.data$ID[matched.data$Pair_ID==5 & matched.data$treat==0]
  P6[i] <- matched.data$ID[matched.data$Pair_ID==6 & matched.data$treat==0]
  P7[i] <- matched.data$ID[matched.data$Pair_ID==7 & matched.data$treat==0]
}

So the loop will match the pairs 500 times and P1 will save the treat==0 case each time.

We then look at the which P1 appears the most, by:

plot(1:n, P1, main="P1")

OR

summary(as.factor(P1))

We see that no one treat==0 case is commonly paired. I would expect there to be a case (possibly =193??) that is commonly paired that does not depend on the order of the data. Therefore I think my loop is wrong. Can anybody point out where? Or when they run a loop, they find, independent of the order of the data, that similar cases are paired??

回答1:

The problem is that you randomise the order of lalonde, but your input to GenMatch and Match are X and BalanceMat which still have the original order. When you then build your matched.data at the end, you are subsetting using indices which don't tie into lalonde any more. Try again but including the assignment of X and BalanceMat in your loop.

i.e.

X = cbind(lalonde$age, lalonde$educ, lalonde$black, lalonde$hisp, 
          lalonde$married, lalonde$nodegr, lalonde$u74, lalonde$u75, 
          lalonde$re75, lalonde$re74)

BalanceMat <- cbind(lalonde$age, lalonde$educ, lalonde$black, 
                    lalonde$hisp, lalonde$married, lalonde$nodegr, 
                    lalonde$u74, lalonde$u75, lalonde$re75, lalonde$re74, 
                    I(lalonde$re74*lalonde$re75))