可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效,请关闭广告屏蔽插件后再试):
问题:
I have N=6
elements and q=3
elements symboled as 0
,1
,2
.
I want to create all the vectors of N=6
elements with 2
elements equal to 0
, 2
elements equal to 1
and 2
elements equal to 2
in all the possible positions.
The number of these vectors are equal to combn(6,2)*combn(4,2)*combn(2,2)=90
.
Here is a code that construct these 90
vectors in the matrix F
:
N=6
x<-c(1:N)
#########################################
A<-combn(x,2)
B<-matrix(0,ncol(A),length(x))
for( i in 1:ncol(A) )
{
y<-rep(0,N)
y[A[1:nrow(A),i]]<-1
B[i,]<-y
}
######################################
E<-matrix(0,nrow(B),length(x)-nrow(A))
for( i in 1:nrow(B) )
{
q=0
for( j in 1:ncol(B) )
{
if( B[i,j]!=1 )
{
q=q+1
E[i,q]<-j
}}}
########################################
ASD<-combn(E[i,],2)
F<-matrix(0,nrow(B)*ncol(ASD),length(x))
q=0
for( i in 1:nrow(B) )
{
ASD<-combn(E[i,],2)
for( j in 1:ncol(ASD) )
{
B[i,ASD[1:nrow(ASD),j]]<-2
q=q+1
F[q,]<-B[i,]
B[i,ASD[1:nrow(ASD),j]]<-0
}}
Is there any other less complicated way to do it?
回答1:
You can use the iterpc
package:
> library(iterpc)
> I <- iterpc(c(2,2,2), labels=c(0,1,2), ordered=TRUE)
> getall(I)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0 1 1 2 2
[2,] 0 0 1 2 1 2
[3,] 0 0 1 2 2 1
[4,] 0 0 2 1 1 2
[5,] 0 0 2 1 2 1
[6,] 0 0 2 2 1 1
[7,] 0 1 0 1 2 2
[8,] 0 1 0 2 1 2
[9,] 0 1 0 2 2 1
[10,] 0 1 1 0 2 2
[11,] 0 1 1 2 0 2
[12,] 0 1 1 2 2 0
[13,] 0 1 2 0 1 2
[14,] 0 1 2 0 2 1
[15,] 0 1 2 1 0 2
[16,] 0 1 2 1 2 0
[17,] 0 1 2 2 0 1
[18,] 0 1 2 2 1 0
[19,] 0 2 0 1 1 2
[20,] 0 2 0 1 2 1
[21,] 0 2 0 2 1 1
[22,] 0 2 1 0 1 2
[23,] 0 2 1 0 2 1
[24,] 0 2 1 1 0 2
[25,] 0 2 1 1 2 0
[26,] 0 2 1 2 0 1
[27,] 0 2 1 2 1 0
[28,] 0 2 2 0 1 1
[29,] 0 2 2 1 0 1
[30,] 0 2 2 1 1 0
[31,] 1 0 0 1 2 2
[32,] 1 0 0 2 1 2
[33,] 1 0 0 2 2 1
[34,] 1 0 1 0 2 2
[35,] 1 0 1 2 0 2
[36,] 1 0 1 2 2 0
[37,] 1 0 2 0 1 2
[38,] 1 0 2 0 2 1
[39,] 1 0 2 1 0 2
[40,] 1 0 2 1 2 0
[41,] 1 0 2 2 0 1
[42,] 1 0 2 2 1 0
[43,] 1 1 0 0 2 2
[44,] 1 1 0 2 0 2
[45,] 1 1 0 2 2 0
[46,] 1 1 2 0 0 2
[47,] 1 1 2 0 2 0
[48,] 1 1 2 2 0 0
[49,] 1 2 0 0 1 2
[50,] 1 2 0 0 2 1
[51,] 1 2 0 1 0 2
[52,] 1 2 0 1 2 0
[53,] 1 2 0 2 0 1
[54,] 1 2 0 2 1 0
[55,] 1 2 1 0 0 2
[56,] 1 2 1 0 2 0
[57,] 1 2 1 2 0 0
[58,] 1 2 2 0 0 1
[59,] 1 2 2 0 1 0
[60,] 1 2 2 1 0 0
[61,] 2 0 0 1 1 2
[62,] 2 0 0 1 2 1
[63,] 2 0 0 2 1 1
[64,] 2 0 1 0 1 2
[65,] 2 0 1 0 2 1
[66,] 2 0 1 1 0 2
[67,] 2 0 1 1 2 0
[68,] 2 0 1 2 0 1
[69,] 2 0 1 2 1 0
[70,] 2 0 2 0 1 1
[71,] 2 0 2 1 0 1
[72,] 2 0 2 1 1 0
[73,] 2 1 0 0 1 2
[74,] 2 1 0 0 2 1
[75,] 2 1 0 1 0 2
[76,] 2 1 0 1 2 0
[77,] 2 1 0 2 0 1
[78,] 2 1 0 2 1 0
[79,] 2 1 1 0 0 2
[80,] 2 1 1 0 2 0
[81,] 2 1 1 2 0 0
[82,] 2 1 2 0 0 1
[83,] 2 1 2 0 1 0
[84,] 2 1 2 1 0 0
[85,] 2 2 0 0 1 1
[86,] 2 2 0 1 0 1
[87,] 2 2 0 1 1 0
[88,] 2 2 1 0 0 1
[89,] 2 2 1 0 1 0
[90,] 2 2 1 1 0 0
EDIT 2018-04-28
iterpc
is now deprecated in favor of arrangements
.
回答2:
Here is a super fast one liner from the development version of the package RcppAlgos
.
devtools::install_github("jwood000/RcppAlgos")
library(RcppAlgos)
myPerms <– permuteGeneral(3,6,TRUE,"prod","==",36) - 1L
myPerms
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0 1 1 2 2
[2,] 0 0 1 2 1 2
[3,] 0 0 1 2 2 1
[4,] 0 0 2 1 1 2
[5,] 0 0 2 1 2 1
[6,] 0 0 2 2 1 1
.
.
.
[,1] [,2] [,3] [,4] [,5] [,6]
[85,] 2 2 0 0 1 1
[86,] 2 2 0 1 0 1
[87,] 2 2 0 1 1 0
[88,] 2 2 1 0 0 1
[89,] 2 2 1 0 1 0
[90,] 2 2 1 1 0 0
Here are some benchmarks where rcppAlgo
, r2eOne
, r2eTwo
, and OPFun
are function wrappers of the code for each method.
microbenchmark(rcppAlgo(),r2eOne(),r2eTwo(),OPFun(N=6) unit = "relative")
Unit: relative
expr min lq mean median uq max neval
rcppAlgo() 1.00000 1.00000 1.00000 1.0000 1.00000 1.000000 100
r2eOne() 471.56007 473.26487 194.01669 267.9402 274.46604 8.373630 100
r2eTwo() 50.71091 48.84173 24.01617 27.8441 34.02326 2.044374 100
OPFun(N=6) 37.35899 24.38966 22.38029 19.7059 19.51935 31.18059 100
Explanation
Since the OP is looking for a particular combination of numbers with specific frequencies, we can make use of the Fundamental theorem of arithmetic, which states that every number can be written as the product of a unique combination of prime numbers. We are given the set 0, 1, 2
and adding 1 gives the set 1, 2, 3
. We do this to avoid getting many zeros when we take the product.
Now, we are tasked with finding all combinations such that each element occurs exactly twice. This means after we apply the product to our target combination we get 1*1*2*2*3*3 = 36
(N.B. 1
is not prime but can be ignored since 1*n = n for all n
). Now the problem is simple.
We simply find all combinations such that the product is equal to 36
and subsequently subtract 1
to get back to our original set of numbers and Voila!
General Solution
Below, we have a general solution that can be used to find all permutations of a given vector with repetition of each element a specific number of times.
library(RcppAlgos) ## for primeSieve and permuteGeneral
MakePerms <- function(v, numReps, myCap = NULL) {
m <- sum(numReps)
n <- length(v)
## Generate some primes using prime
## number theorem; fudging a bit to
## ensure we get n-1 prime numbers
myPs <- primeSieve(2*n*log(n))[1:(n-1)]
## Set up vector that will be tested
myV <- c(1L, myPs)
target <- prod(myV^numReps)
ps <- permuteGeneral(myV, m, TRUE, "prod", "==", target, myCap)
for (j in 1:n) {ps[ps == myV[j]] <- v[j]}
ps
}
It relies heavily on the uniqueness of the prime factorization per the Fundamental theorem of arithmetic and a little indexing (not quite as simple as the trivial case above, but still only 7 lines and still super fast).
We first create a vector of the first n-1
primes and tack on a 1
to complete myV
. We then raise each element of myV
to the desired number of repeats per element given by numReps
and take the product to obtain our target
value. Here are some examples:
v = c(10,13,267,1)
and numReps = c(3,1,2,5)
-->> myV = c(1,2,3,5)
-->> target = 1^3 * 2^1 * 3^2 * 5^5 = 56250
v = 0:5
and numReps = c(1,2,1,2,2,2)
-->> myV = c(1,2,3,5,7,11)
-->> target = 1^1 * 2^2 * 3^1 * 5^2 * 7^2 * 11^2 = 1778700
- OP Example:
v = c(0,1,2)
and numReps = c(2,2,2)
-->> myV = c(1,2,3)
-->> target = 1^2 * 2^2 * 3^2 = 36
After we find all permutations that have a product equal to the target
value, we simply map the contents of our original vector v
to the generated matrix using indexing.
For example, if you set N = 8
in the OP's example you get all permutations of c(0,1,2)
with 0
repeated exactly 4
times, and 1
and 2
repeated twice.
t1 <- OPFun(N=8)
t2 <- MakePerms(0:2, c(4,2,2))
all.equal(t1[do.call(order, as.data.frame(t1)), ],
t2[do.call(order, as.data.frame(t2)), ])
[1] TRUE
microbenchmark(fun2(8), MakePerms(0:2, c(4,2,2)), unit = "relative")
Unit: relative
expr min lq mean median uq max neval
OPFun(8) 23.25099 22.56178 18.64762 19.52436 18.37387 10.90934 100
MakePerms(0:2, c(4, 2, 2)) 1.00000 1.00000 1.00000 1.00000 1.00000 1.00000 100
It should be noted that the number of possible permutations grows rapidly, so attempts like so MakePerms(0:5, rep(2, 6))
will fail as the total number of permutations of of 0:5 12 times
is 12^6 = 2,985,984 > 2^31 - 1
(i.e. the maximal number of rows for a matrix in Rcpp
). However, we don't expect all of them to meet our criteria, so if we put in a cap, say 10^7
, we will have success. Observe:
a <- MakePerms(0:5, rep(2, 6), 10^7)
nrow(a)
7484400
set.seed(17)
a[sample(nrow(a), 10), ]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
[1,] 0 5 3 3 1 2 4 4 5 1 0 2
[2,] 5 4 2 1 1 0 3 4 5 2 3 0
[3,] 2 4 5 3 5 1 3 0 1 0 4 2
[4,] 4 3 3 1 2 5 0 5 4 1 0 2
[5,] 2 2 5 3 4 1 0 3 5 1 0 4
[6,] 3 1 1 5 0 3 2 0 2 4 4 5
[7,] 1 1 4 2 0 5 4 0 3 5 3 2
[8,] 1 0 4 2 4 2 5 1 3 0 5 3
[9,] 4 3 4 1 5 0 0 2 2 1 3 5
[10,] 1 0 5 3 2 0 1 4 3 4 2 5
The use of myCap
can greatly increase efficiency as well.
microbenchmark(withOutCap = MakePerms(0:5, c(1,2,1,2,1,2)),
withCap = MakePerms(0:5, c(1,2,1,2,1,2), 10^5),
times = 15)
Unit: milliseconds
expr min lq mean median uq max neval
withOutCap 219.64847 246.4718 275.04672 282.52829 299.33816 311.2031 15
withCap 22.56437 30.6904 33.30469 31.70443 37.50858 41.6095 15
identical(MakePerms(0:5, c(1,2,1,2,1,2)), MakePerms(0:5, c(1,2,1,2,1,2), 10^5))
[1] TRUE
iterpc
Solution
It seems as though the answers provided to this point are strictly academic as the answer provided by @StéphaneLaurent is far superior. It is super general, one line, and super fast!!
microbenchmark(iter = getall(iterpc(c(2,2,2), labels=c(0,1,2), ordered=TRUE)),
rcppAlg = MakePerms(0:2, c(2,2,2)))
Unit: microseconds
expr min lq mean median uq max neval
iter 428.885 453.2975 592.53164 540.154 683.9585 1165.772 100
rcppAlg 62.418 74.5205 93.44926 81.749 108.4660 216.454 100
The story changes as the number of permutations grows. Observe:
microbenchmark(iter = getall(iterpc(c(2,2,2,2), labels=c(0,1,2,3), ordered=TRUE)),
rcppAlg = MakePerms(0:3, c(2,2,2,2)),
rcppAlgCap = MakePerms(0:3, c(2,2,2,2), 5000))
Unit: microseconds
expr min lq mean median uq max neval
iter 877.246 1052.7060 1394.636 1150.0895 1265.088 8914.980 100
rcppAlg 964.446 1449.7115 2084.944 1787.9350 1906.242 10921.156 100
If you utilize myCap
, MakePerms
is a little faster. This doesn't really matter, because with the iterpc
solution, you don't even have to think about how many results you will get. Very nice!!
Update
The new version of RcppAlgos
(which I am the author of) has just been released on CRAN. There is an additional argument now for permuteGeneral
called freqs
which allows permutations of multisets, which is exactly what the OP is looking for.
microbenchmark(iter = getall(iterpc(c(2,2,2,2), labels=0:3, ordered=TRUE)),
newRcppAlgos = permuteGeneral(0:3, freqs = c(2,2,2,2)))
Unit: microseconds
expr min lq mean median uq max neval
iter 457.442 482.8365 609.98678 508.6150 572.581 4037.048 100
newRcppAlgos 33.159 43.3975 56.40026 48.5665 58.194 625.691 100
microbenchmark(iter = getall(iterpc(c(5,4,3,2), labels=0:3, ordered=TRUE)),
newRcppAlgos = permuteGeneral(0:3, freqs = c(5,4,3,2)))
Unit: milliseconds
expr min lq mean median uq max neval
iter 480.25976 552.54343 567.9155 565.23066 579.0258 751.8556 100
newRcppAlgos 83.41194 87.03957 104.6279 95.67596 107.3572 181.1119 100
identical(getall(iterpc(c(5,4,3,2), labels=0:3, ordered=TRUE)),
permuteGeneral(0:3, freqs = c(5,4,3,2)))
[1] TRUE
nrow(permuteGeneral(0:3, freqs = c(5,4,3,2)))
[1] 2522520
Update 2
As @StéphaneLaurent points out, the package arrangements
has been released as a replacement for iterpc
(see comments by @RandyLai). It is much more efficient and is capable of handling a broader set of combinatorial problems (e.g. partitions). Here are the benchmarks for the larger example:
microbenchmark(arrangements = permutations(x = 0:3, freq = c(5,4,3,2)),
RcppAlgos = permuteGeneral(0:3, freqs = c(5,4,3,2)))
Unit: milliseconds
expr min lq mean median uq max neval
arrangements 97.10078 98.67154 113.5953 100.56261 131.3244 163.8912 100
RcppAlgos 92.13122 93.84818 108.1845 95.72691 101.2647 165.7248 100
...Nearly identical results.
A huge benefit of arrangements
is the ability to get permutations one at a time (or in chunks) via getnext
. This allows the user to generate more than 2^31 - 1
results and offers more flexibility in general.
For more information regarding problems like this in R
, I wrote an extensive overview to the question : R: Permutations and combinations with/without replacement and for distinct/non-distinct items/multiset.
回答3:
Two methods suggested: an inefficient one and a more efficient but laborious one. (In this context, I equate "efficiency" to scaling, not to the amount of code to do it or execution time. That is, as long as you are only creating 90 rows, then you're fine. If this is a simplified problem and you really need to expand to a much larger matrix, then permutations
might exceed memory and/or R's capacity.)
Both solutions are a bit shorter than your code. The first is relatively clear to read and only 4 lines of code; the second is admittedly a bit obscure (and appears to go into index indirection "inception"), but is really still only 13 lines of needed code. It's possible the second could be reduced somewhat, but I ran out of "play" time :-)
Inefficient
One method is to create all permutations and filter out repeats. This works as long as your "N" doesn't get too big.
library(gtools)
v <- rep(0:2, 2)
p <- permutations(6, 6)
p[] <- v[p]
p <- p[!duplicated(p),]
head(p)
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 0 1 2 0 1 2
# [2,] 0 1 2 0 2 1
# [3,] 0 1 2 1 0 2
# [4,] 0 1 2 1 2 0
# [5,] 0 1 2 2 0 1
# [6,] 0 1 2 2 1 0
tail(p)
# [,1] [,2] [,3] [,4] [,5] [,6]
# [85,] 2 2 0 1 0 1
# [86,] 2 2 0 1 1 0
# [87,] 2 2 0 0 1 1
# [88,] 2 2 1 0 0 1
# [89,] 2 2 1 0 1 0
# [90,] 2 2 1 1 0 0
Verify each row has exactly two of each element:
all(apply(p, 1, table) == 2)
# [1] TRUE
Less-Inefficient
A less-inefficient (and therefore more effort) method: create a matrix of column indices, using the combn(6,2)
and combn(4,2)
, and then assign the "factors" appropriately. (It'll make more sense in a moment.)
(NB: I often think better about these problems in terms of the transposed matrix; you can easily do this un-transposed, just adjust the code to swap columns/rows.)
What we need is something like expand.grid
for two columns at a time. So we'll start with the smaller problems:
left2 <- t(combn(6, 2))
mid2 <- t(combn(4, 2))
left2
# [,1] [,2]
# [1,] 1 2
# [2,] 1 3
# [3,] 1 4
# [4,] 1 5
# [5,] 1 6
# [6,] 2 3
# [7,] 2 4
# [8,] 2 5
# [9,] 2 6
# [10,] 3 4
# [11,] 3 5
# [12,] 3 6
# [13,] 4 5
# [14,] 4 6
# [15,] 5 6
mid2
# [,1] [,2]
# [1,] 1 2
# [2,] 1 3
# [3,] 1 4
# [4,] 2 3
# [5,] 2 4
# [6,] 3 4
Now, the grid will expand on the row indices of these two matrices.
eg <- expand.grid(a = 1:15, b = 1:6)
head(eg)
# a b
# 1 1 1
# 2 2 1
# 3 3 1
# 4 4 1
# 5 5 1
# 6 6 1
inds <- cbind(left2[eg$a,], mid2[eg$b,])
head(inds)
# [,1] [,2] [,3] [,4]
# [1,] 1 2 1 2
# [2,] 1 3 1 2
# [3,] 1 4 1 2
# [4,] 1 5 1 2
# [5,] 1 6 1 2
# [6,] 2 3 1 2
inds[25,,drop=FALSE]
# [,1] [,2] [,3] [,4]
# [1,] 3 4 1 3
This means that, for row 25, we should replace columns 3 and 4 with the first factor (say 0
). Then, of the remaining columns (1,2,5,6), we should replace the 1st and 3rd columns to the second factor (say 1
). Said again, c(1,2,5,6)[c(1,3)]
equates to columns 1 and 5 being replace with the second value (1
). (The third value 2
will go in all remaining slots.)
So, to come up with the c(1,2,5,6)
from above, we can use setdiff(1:6,...)
:
afterleft2 <- t(apply(left2[eg$a,], 1, function(a) setdiff(1:6, a)))
head( afterleft2 )
# [,1] [,2] [,3] [,4]
# [1,] 3 4 5 6
# [2,] 2 4 5 6
# [3,] 2 3 5 6
# [4,] 2 3 4 6
# [5,] 2 3 4 5
# [6,] 1 4 5 6
afterleft2[25,,drop=FALSE]
# [,1] [,2] [,3] [,4]
# [1,] 1 2 5 6
So let's get to fixing the inds
third and fourth columns.
inds[,3] <- afterleft2[ cbind(1:90, mid2[eg$b,1]) ]
inds[,4] <- afterleft2[ cbind(1:90, mid2[eg$b,2]) ]
head(inds)
# [,1] [,2] [,3] [,4]
# [1,] 1 2 3 4
# [2,] 1 3 2 4
# [3,] 1 4 2 3
# [4,] 1 5 2 3
# [5,] 1 6 2 3
# [6,] 2 3 1 4
inds[25,,drop=FALSE]
# [,1] [,2] [,3] [,4]
# [1,] 3 4 1 5
We see from this that the 25th row has the "1" and "5" we expect.
Now the finish:
nr <- nrow(inds)
out <- matrix(nrow = nr, ncol = 6L)
out[cbind(1:nr,inds[,1])] <- 0L
out[cbind(1:nr,inds[,2])] <- 0L
out[cbind(1:nr,inds[,3])] <- 1L
out[cbind(1:nr,inds[,4])] <- 1L
head(out)
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 0 0 1 1 NA NA
# [2,] 0 1 0 1 NA NA
# [3,] 0 1 1 0 NA NA
# [4,] 0 1 1 NA 0 NA
# [5,] 0 1 1 NA NA 0
# [6,] 1 0 0 1 NA NA
out[25,,drop=FALSE]
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 NA 0 0 1 NA
The "remaining slots" I mentioned above (for the third value) are all NA
, by design.
out[is.na(out)] <- 2L
head(out)
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 0 0 1 1 2 2
# [2,] 0 1 0 1 2 2
# [3,] 0 1 1 0 2 2
# [4,] 0 1 1 2 0 2
# [5,] 0 1 1 2 2 0
# [6,] 1 0 0 1 2 2
out[25,,drop=FALSE]
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 2 0 0 1 2
Now a quick sanity check to ensure our out
variable has exactly two of each element on each row.
all(apply(out, 1, table) == 2)
# [1] TRUE