Backgroud: I am trying to recursively select rows and check for conditions based on combinations. I am able to dump the combinations using iterpc function. However, when I repeat the for loop on bigger row datasets (of input file) >200, I get the error - "Cannot allocate vector of size n GB".
CODE
DATA file with PARA to PARL (presented parameters, mostly numeric) data1 <- file.choose(read.csv(), stringasFactors =FALSE)
#### Combination prediction using iterpc and a loop
to check condition success of subset rows #####
require(iterpc)
getComboChunks <- function(n, k, chunkSize, totalCombos, myFile, myTestFile) {
myIter <- iterpc(n, k)
## initialized myFile
myCombs <- getnext(myIter, chunkSize)
write.table(myCombs, file = myFile, sep = ",", col.names = FALSE)
maxIteration <- (totalCombos - chunkSize) %/% chunkSize
for (i in 1:maxIteration) {
## get the next "chunkSize" of combinations
myCombs <- getnext(myIter, chunkSize)
## append the above combinations to your file
write.table(myCombs, file = myFile, sep = ",",
col.names = FALSE , append = TRUE)
o <- 1
namee <- subset(data1, SNO %in% myCombs)
a <- sum(namee$Weight)
h <- (sum(namee$PARA*namee$Weight))/a
f <- (sum(namee$PARB*namee$Weight))/a
g <- (sum(namee$PARC*namee$Weight))/a
l <- (sum(namee$PARE*namee$Weight))/a
m <- (sum(namee$PARF*namee$Weight))/a
n <- (sum(namee$PARD*namee$Weight))/a
p <- (sum(namee$PARG*namee$Weight))/a
q <- (sum(namee$ParH*namee$Weight))/a
r <- (sum(namee$PARI))
pr <- (sum(namee$pr))
le <- (sum(namee$PARJ*namee$Weight))/a
PM[is.na(PM)] <- 0
k <- ifelse(aska <= a && askle <= le
&& askh <= h && askf <= f
&& askg <= g && askl <= l && askm <= m
&& askn <= n && askp <= p && askq <= q
&& askr <= r && pr >=askpr && a <aska2
&& le < askle2 && g <askg2 && f <askf2
&& h <askh2&& l <askl2 && m <askm2
&& n <askn2 && p <askp2 && q <askq2
&& r <askr2, "Success","Failure")
if (k == "Success")
{
PM$SNO <- as.character(PM$SNO)
Masterlist$SNO <- as.character(Masterlist$SNO)
PM[is.na(PM)] <- 0
List <- rbind(List, as.vector(c(i,a,h,f,g,l,m,n,p,q,le,r,k)))
print("Success")
}
if (k == "Failure"){
print("Failure")
print(i)
}
rm(namee)
o <- o+1
gc()
}
myTests <- List
## append the above combinations to your file
write.table(myTests, file = myTestFile, sep = ",",
col.names = FALSE , append = TRUE)
}
A dataset with 200 rows
getComboChunks(200, 5, 1, 2535650040, "myCombos1.csv", "myTests.csv")
Solution To recursively check for the conditions by sequentially flushing memory to work on larger datasets (200 or more rows). I believe I am getting the error "cannot allocate vector of size n GB" because of the following line
List <- rbind(List, as.vector(c(i,a,h,f,g,l,m,n,p,q,le,r,k)))
Can this be eliminated by preallocating a huge vector and dumping the values instead of rbind function. Additionally, can the memory be flushed after every run?