The files condition.txt and gene.txt both contain 8000000 rows, but have different column sizes in each row. The computing with the following code have run two weeks, but it is still not completed. how to rewrite the following code with with parallel computing in r or in python. The introduction of the problem see R code runs too slow,how to rewrite this code. Thank you.
library(compiler)
library(Matrix)
enableJIT(3)
i=0;
con <- file("condition.txt", "r")
con2<-file("gene.txt","r")
x1<-readLines(con,n=-1)
x2<-readLines(con2,n=-1)
str2mat <- function(s) {
n <- length(s)
ni <- sapply(s, length)
s <- unlist(s)
u <- unique(s)
spMatrix(nrow=n, ncol=length(u), i=rep(1L:n, ni), j=match(s, u), x=rep(1, length(s)))
}
m1 <- str2mat(strsplit(x1, "|", fixed=TRUE))
m2 <- str2mat(strsplit(x2, "|", fixed=TRUE))
n1 <- rowSums(m1)
n2 <- rowSums(m2)
num <- tcrossprod(m1)*tcrossprod(m2)
n12 <- n1*n2
den <- outer(n12, n12, pmin)
use <- num/den > 0.6
diag(use) <- FALSE
use[lower.tri(use)] <- FALSE
out <- which(use, arr.ind=TRUE)
The result of Rprof analysis with smaller input file (20 rows) is as fllows:
$by.self
self.time self.pct total.time total.pct
"options" 0.10 17.86 0.10 17.86
"findCenvVar" 0.08 14.29 0.14 25.00
"unique" 0.06 10.71 0.12 21.43
"unlist" 0.06 10.71 0.12 21.43
"$" 0.06 10.71 0.06 10.71
"<Anonymous>" 0.02 3.57 0.52 92.86
"tryCatchOne" 0.02 3.57 0.46 82.14
"FUN" 0.02 3.57 0.12 21.43
"findVar" 0.02 3.57 0.04 7.14
"%in%" 0.02 3.57 0.02 3.57
".simpleInheritanceGeneric" 0.02 3.57 0.02 3.57
"as.list" 0.02 3.57 0.02 3.57
"get" 0.02 3.57 0.02 3.57
"getClassDef" 0.02 3.57 0.02 3.57
"parent.env" 0.02 3.57 0.02 3.57
$by.total
total.time total.pct self.time self.pct
"<Anonymous>" 0.52 92.86 0.02 3.57
"test" 0.50 89.29 0.00 0.00
"tryCatchOne" 0.46 82.14 0.02 3.57
"cmpfun" 0.46 82.14 0.00 0.00
"doTryCatch" 0.46 82.14 0.00 0.00
"tryCatch" 0.46 82.14 0.00 0.00
"tryCatchList" 0.46 82.14 0.00 0.00
"standardGeneric" 0.42 75.00 0.00 0.00
"cmp" 0.32 57.14 0.00 0.00
"cmpCall" 0.32 57.14 0.00 0.00
"genCode" 0.32 57.14 0.00 0.00
"h" 0.28 50.00 0.00 0.00
"tryInline" 0.28 50.00 0.00 0.00
"initialize" 0.26 46.43 0.00 0.00
"new" 0.26 46.43 0.00 0.00
"spMatrix" 0.24 42.86 0.00 0.00
"str2mat" 0.24 42.86 0.00 0.00
"cmpCallArgs" 0.16 28.57 0.00 0.00
"cmpCallSymFun" 0.16 28.57 0.00 0.00
"findCenvVar" 0.14 25.00 0.08 14.29
"findLocalsList" 0.14 25.00 0.00 0.00
"funEnv" 0.14 25.00 0.00 0.00
"make.functionContext" 0.14 25.00 0.00 0.00
"unique" 0.12 21.43 0.06 10.71
"unlist" 0.12 21.43 0.06 10.71
"FUN" 0.12 21.43 0.02 3.57
"/" 0.12 21.43 0.00 0.00
"as" 0.12 21.43 0.00 0.00
"callGeneric" 0.12 21.43 0.00 0.00
"checkCall" 0.12 21.43 0.00 0.00
"eval" 0.12 21.43 0.00 0.00
"findLocalsList1" 0.12 21.43 0.00 0.00
"options" 0.10 17.86 0.10 17.86
"cmpForBody" 0.10 17.86 0.00 0.00
"lapply" 0.10 17.86 0.00 0.00
".findInheritedMethods" 0.08 14.29 0.00 0.00
"asMethod" 0.08 14.29 0.00 0.00
"isSymmetric" 0.08 14.29 0.00 0.00
"$" 0.06 10.71 0.06 10.71
"cmpBuiltinArgs" 0.06 10.71 0.00 0.00
"cmpSym" 0.06 10.71 0.00 0.00
"getInlineInfo" 0.06 10.71 0.00 0.00
"findVar" 0.04 7.14 0.02 3.57
".asCoerceMethod" 0.04 7.14 0.00 0.00
".local" 0.04 7.14 0.00 0.00
"all.equal" 0.04 7.14 0.00 0.00
"anyStrings" 0.04 7.14 0.00 0.00
"cmpPrim1" 0.04 7.14 0.00 0.00
"findFunDef" 0.04 7.14 0.00 0.00
"forceSymmetric" 0.04 7.14 0.00 0.00
"isTRUE" 0.04 7.14 0.00 0.00
"validityMethod" 0.04 7.14 0.00 0.00
"validObject" 0.04 7.14 0.00 0.00
"%in%" 0.02 3.57 0.02 3.57
".simpleInheritanceGeneric" 0.02 3.57 0.02 3.57
"as.list" 0.02 3.57 0.02 3.57
"get" 0.02 3.57 0.02 3.57
"getClassDef" 0.02 3.57 0.02 3.57
"parent.env" 0.02 3.57 0.02 3.57
"*" 0.02 3.57 0.00 0.00
".a.e.comb" 0.02 3.57 0.00 0.00
".eligibleSuperClasses" 0.02 3.57 0.00 0.00
">" 0.02 3.57 0.00 0.00
"all.equal_num" 0.02 3.57 0.00 0.00
"as.vector" 0.02 3.57 0.00 0.00
"checkSkipLoopCntxt" 0.02 3.57 0.00 0.00
"checkSkipLoopCntxtList" 0.02 3.57 0.00 0.00
"cmpBuiltin" 0.02 3.57 0.00 0.00
"cmpCallExprFun" 0.02 3.57 0.00 0.00
"cmpComplexAssign" 0.02 3.57 0.00 0.00
"cmpPrim2" 0.02 3.57 0.00 0.00
"cmpSetterCall" 0.02 3.57 0.00 0.00
"cmpSetterDispatch" 0.02 3.57 0.00 0.00
"cmpSymbolAssign" 0.02 3.57 0.00 0.00
"extends" 0.02 3.57 0.00 0.00
"Filter" 0.02 3.57 0.00 0.00
"findLocVar" 0.02 3.57 0.00 0.00
"is" 0.02 3.57 0.00 0.00
"isBaseVar" 0.02 3.57 0.00 0.00
"isLoopTopFun" 0.02 3.57 0.00 0.00
"match.fun" 0.02 3.57 0.00 0.00
"Matrix" 0.02 3.57 0.00 0.00
"outer" 0.02 3.57 0.00 0.00
"rowSums" 0.02 3.57 0.00 0.00
"sapply" 0.02 3.57 0.00 0.00
"try" 0.02 3.57 0.00 0.00
"trySetterInline" 0.02 3.57 0.00 0.00
"which" 0.02 3.57 0.00 0.00
$sample.interval
[1] 0.02
$sampling.time
[1] 0.56