I am trying to run following example code:
import org.apache.spark.mllib.fpm.PrefixSpan
val sequences = sc.parallelize(Seq(
Array(Array(1, 2), Array(3)),
Array(Array(1), Array(3, 2), Array(1, 2)),
Array(Array(1, 2), Array(5)),
Array(Array(6))
), 2).cache()
val prefixSpan = new PrefixSpan()
.setMinSupport(0.5)
.setMaxPatternLength(5)
val model = prefixSpan.run(sequences)
model.freqSequences.collect().foreach { freqSequence =>
println(
freqSequence.sequence.map(_.mkString("[", ", ", "]")).mkString("[", ", ", "]") +
", " + freqSequence.freq
)
}
I need to format model.freqSequences to something similar to following(it is a dataframe with sequence and freq)
|[WrappedArray(2,3)] | 3
|[WrappedArray(1)] | 2
|[WrappedArray(2,1)] | 1