R distribute weights over a vector

2019-07-09 01:42发布

问题:

Suppose I have a vector in R

 0    1    0    0    1    0    0    0    0     1     0

There can be no more than 6 "1"s anywhere in the vector. All other elements are 0.

I'm trying to get all possible values where I distribute "1" across the 1 positions where each value has to be <= 0.5.

So for instance:

0    .2    0    0    .3    0    0    0    0     .5     0 . <- OK

0    .35    0    0    .4    0    0    0    0     .25     0 <- OK

However

0    .2   0    0    .2    0    0    0    0     .6     0  <- not ok

The increments can go up by 0.05.

Thus in a vector with 3 "1"'s there are at most 20^3 combinations many of which will be bad as they will sum to greater than 1 or have values >0.5. Is there a faster way than to brute force this?

Edit: I realized that I can come up with all possible weights quickly using:

temp <- expand.grid(replicate(sum(x),seq(0.05,.5,0.05), simplify=FALSE))

where x is my vector.

So now for each one of those in temp I want to put in positions where the 1's are in

 0    1    0    0    1    0    0    0    0     1     0

回答1:

Edit: As @www points out in the comments, you will miss some combinations/permutations if you rely on floating point arithmetic. To remedy this, we need to work with integer precision (i.e. instead of seq(0, 0.5, 0.05) we need seq(0L, 50L, 5L)) and divide our results by 100.

I authored the package RcppAlgos that is meant precisely for problems such as these:

library(RcppAlgos)
myCombs <- comboGeneral(seq(0L,50L,5L), 6, TRUE, 
                        constraintFun = "sum", 
                        comparisonFun = "==", 
                        limitConstraints = 100L) / 100
head(myCombs, n = 10)
      [,1] [,2] [,3] [,4] [,5] [,6]
 [1,]    0    0    0 0.00 0.50 0.50
 [2,]    0    0    0 0.05 0.45 0.50
 [3,]    0    0    0 0.10 0.40 0.50
 [4,]    0    0    0 0.10 0.45 0.45
 [5,]    0    0    0 0.15 0.35 0.50
 [6,]    0    0    0 0.15 0.40 0.45
 [7,]    0    0    0 0.20 0.30 0.50
 [8,]    0    0    0 0.20 0.35 0.45
 [9,]    0    0    0 0.20 0.40 0.40
[10,]    0    0    0 0.25 0.25 0.50

tail(myCombs, n = 10)
       [,1] [,2] [,3] [,4] [,5] [,6]
[190,] 0.10 0.10 0.15 0.15 0.15 0.35
[191,] 0.10 0.10 0.15 0.15 0.20 0.30
[192,] 0.10 0.10 0.15 0.15 0.25 0.25
[193,] 0.10 0.10 0.15 0.20 0.20 0.25
[194,] 0.10 0.10 0.20 0.20 0.20 0.20
[195,] 0.10 0.15 0.15 0.15 0.15 0.30
[196,] 0.10 0.15 0.15 0.15 0.20 0.25
[197,] 0.10 0.15 0.15 0.20 0.20 0.20
[198,] 0.15 0.15 0.15 0.15 0.15 0.25
[199,] 0.15 0.15 0.15 0.15 0.20 0.20

If you are interested in permutations, no problem:

myPerms <- permuteGeneral(seq(0L,50L,5L), 6, TRUE, 
                          constraintFun = "sum", 
                          comparisonFun = "==", 
                          limitConstraints = 100L) / 100

head(myPerms, n = 10)
      [,1] [,2] [,3] [,4] [,5] [,6]
 [1,]    0  0.0  0.0  0.0  0.5  0.5
 [2,]    0  0.0  0.0  0.5  0.0  0.5
 [3,]    0  0.0  0.0  0.5  0.5  0.0
 [4,]    0  0.0  0.5  0.0  0.0  0.5
 [5,]    0  0.0  0.5  0.0  0.5  0.0
 [6,]    0  0.0  0.5  0.5  0.0  0.0
 [7,]    0  0.5  0.0  0.0  0.0  0.5
 [8,]    0  0.5  0.0  0.0  0.5  0.0
 [9,]    0  0.5  0.0  0.5  0.0  0.0
[10,]    0  0.5  0.5  0.0  0.0  0.0

tail(myPerms, n = 10)
         [,1] [,2] [,3] [,4] [,5] [,6]
[41109,] 0.15 0.15 0.20 0.20 0.15 0.15
[41110,] 0.15 0.20 0.15 0.15 0.15 0.20
[41111,] 0.15 0.20 0.15 0.15 0.20 0.15
[41112,] 0.15 0.20 0.15 0.20 0.15 0.15
[41113,] 0.15 0.20 0.20 0.15 0.15 0.15
[41114,] 0.20 0.15 0.15 0.15 0.15 0.20
[41115,] 0.20 0.15 0.15 0.15 0.20 0.15
[41116,] 0.20 0.15 0.15 0.20 0.15 0.15
[41117,] 0.20 0.15 0.20 0.15 0.15 0.15
[41118,] 0.20 0.20 0.15 0.15 0.15 0.15

Result is immediate:

system.time(permuteGeneral(seq(0L,50L,5L), 6, TRUE, 
                           constraintFun = "sum", 
                           comparisonFun = "==", 
                           limitConstraints = 100L) / 100)
 user  system elapsed 
0.005   0.001   0.006


Quick Thoughts
One may be tempted to attack this problem as an additive integer partition problem. There is a mapping from seq(0, 0.5, 0.05) to 0:11 as well as a mapping from seq(0, 1, 0.05) to 0:20. The latter may not be obvious as to why it is helpful but indeed it is. There is a very nice package called partitions that comes equipped with a function for generating restricted partitions (that is, partitions of a given length).

library(partitions)
myParts <- t(as.matrix(restrictedparts(20, 6))) / 20

head(myParts)
     [,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1.00 0.00    0    0    0    0
[2,] 0.95 0.05    0    0    0    0
[3,] 0.90 0.10    0    0    0    0
[4,] 0.85 0.15    0    0    0    0
[5,] 0.80 0.20    0    0    0    0
[6,] 0.75 0.25    0    0    0    0

As you can see, we have already violated are requirement of having numbers greater than 0.5. So we have to do a little extra work to get our final result:

myMax <- apply(myParts, 1, max)
myFinalParts <- myParts[-which(myMax > 0.5), ]

head(myFinalParts)
     [,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0.50 0.50 0.00    0    0    0
[2,] 0.50 0.45 0.05    0    0    0
[3,] 0.50 0.40 0.10    0    0    0
[4,] 0.45 0.45 0.10    0    0    0
[5,] 0.50 0.35 0.15    0    0    0
[6,] 0.45 0.40 0.15    0    0    0

tail(myFinalParts, n = 10)
       [,1] [,2] [,3] [,4] [,5] [,6]
[190,] 0.35 0.15 0.15 0.15 0.10 0.10
[191,] 0.30 0.20 0.15 0.15 0.10 0.10
[192,] 0.25 0.25 0.15 0.15 0.10 0.10
[193,] 0.25 0.20 0.20 0.15 0.10 0.10
[194,] 0.20 0.20 0.20 0.20 0.10 0.10
[195,] 0.30 0.15 0.15 0.15 0.15 0.10
[196,] 0.25 0.20 0.15 0.15 0.15 0.10
[197,] 0.20 0.20 0.20 0.15 0.15 0.10
[198,] 0.25 0.15 0.15 0.15 0.15 0.15
[199,] 0.20 0.20 0.15 0.15 0.15 0.15

As you can see, we have the exact same solution above (see myCombs) only the columns are in a different order.

all.equal(myCombs, myFinalParts[,6:1])
[1] TRUE

For the permutation part, these are actually referred to as restricted integer compositions. We can call partitions::compositions and proceed similarly to the above where we will need to weed out those rows that break our rule (i.e. throw out rows that contain a maximum value greater than 0.5). It is possible to obtain the desired results utilizing partitions, there are just a few extra steps involved.

myComps <- t(as.matrix(compositions(20, 6))) / 20
myMax <- apply(myComps, 1, max)
temp <- myComps[-which(myMax > 0.5), ]
myFinalComps <- temp[do.call(order, as.data.frame(temp)), ]
all.equal(myPerms[do.call(order, as.data.frame(myPerms)), ], myFinalComps)
[1] TRUE


回答2:

Here is one possible option. dat5 is the final output.

# Create all possible combination from 1 to 19
dat1 <- expand.grid(L1 = 1:19, 
                    L2 = 1:19,
                    L3 = 1:19)

# Filter for the rows with sum = 20
dat2 <- dat1[rowSums(dat1) == 20L, ]

# Filter for the rows with no any numbers larger than 10
dat3 <- dat2[rowSums(dat2 > 10) == 0L, ]

# Convert the values by multiplied 0.05
dat4 <- dat3 * 0.05

# Convert the data frame to a list of vectors
dat4$ID <- 1:nrow(dat4)

dat5 <- lapply(split(dat4, f = dat4$ID), function(x){
  c(0, x$L1, 0, 0, x$L2, 0, 0, 0, 0, x$L3, 0)
})


回答3:

I do believe we need to only replace the 1's in the vector given. In that case, the zeros remain the same:

   s = c(0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0)
   m = expand.grid(replicate(sum(s==1),seq(0,0.5,0.05),F))
    indx = replace(replace(s,s==1,1:ncol(m)),s==0,ncol(m)+1)

    dat = unname(cbind(m[rowSums(m)==1,],0)[indx])
    head(dat)

121 0 0.50 0 0 0.50 0 0 0 0 0.00 0
231 0 0.50 0 0 0.45 0 0 0 0 0.05 0
241 0 0.45 0 0 0.50 0 0 0 0 0.05 0
341 0 0.50 0 0 0.40 0 0 0 0 0.10 0
351 0 0.45 0 0 0.45 0 0 0 0 0.10 0
361 0 0.40 0 0 0.50 0 0 0 0 0.10 0
 tail(dat)

1271 0 0.25 0 0 0.25 0 0 0 0 0.5 0
1281 0 0.20 0 0 0.30 0 0 0 0 0.5 0
1291 0 0.15 0 0 0.35 0 0 0 0 0.5 0
1301 0 0.10 0 0 0.40 0 0 0 0 0.5 0
1311 0 0.05 0 0 0.45 0 0 0 0 0.5 0
1321 0 0.00 0 0 0.50 0 0 0 0 0.5 0