|
1 | 1 | package is.hail.linalg |
2 | 2 |
|
3 | | -import is.hail._ |
4 | 3 | import is.hail.annotations._ |
5 | 4 | import is.hail.backend.{BroadcastValue, ExecuteContext, HailStateManager} |
6 | 5 | import is.hail.backend.spark.{SparkBackend, SparkTaskContext} |
@@ -28,7 +27,6 @@ import breeze.numerics.{abs => breezeAbs, log => breezeLog, pow => breezePow, sq |
28 | 27 | import breeze.stats.distributions.RandBasis |
29 | 28 | import org.apache.commons.lang3.StringUtils |
30 | 29 | import org.apache.spark._ |
31 | | -import org.apache.spark.executor.InputMetrics |
32 | 30 | import org.apache.spark.mllib.linalg.distributed.{GridPartitioner => _, _} |
33 | 31 | import org.apache.spark.rdd.RDD |
34 | 32 | import org.apache.spark.storage.StorageLevel |
@@ -216,16 +214,27 @@ object BlockMatrix { |
216 | 214 | readMetadata(ctx.fs, uri) |
217 | 215 |
|
218 | 216 | val gp = GridPartitioner(blockSize, nRows, nCols, maybeFiltered) |
| 217 | + val nPartitions = partFiles.length |
| 218 | + val fsBc = ctx.fsBc |
219 | 219 |
|
220 | | - def readBlock(pi: Int, is: InputStream, metrics: InputMetrics) |
221 | | - : Iterator[((Int, Int), BDM[Double])] = { |
222 | | - val block = RichDenseMatrixDouble.read(is, bufferSpec) |
223 | | - is.close() |
| 220 | + val blocks = |
| 221 | + new RDD[((Int, Int), BDM[Double])](ctx.backend.asSpark.sc, Nil) { |
224 | 222 |
|
225 | | - Iterator.single(gp.partCoordinates(pi) -> block) |
226 | | - } |
| 223 | + case class FilePartition(index: Int, file: String) extends Partition |
| 224 | + |
| 225 | + override lazy val getPartitions: Array[Partition] = |
| 226 | + Array.tabulate(nPartitions)(i => FilePartition(i, partFiles(i))) |
227 | 227 |
|
228 | | - val blocks = HailContext.readPartitions(ctx, uri, partFiles, readBlock, Some(gp)) |
| 228 | + override def compute(split: Partition, context: TaskContext) |
| 229 | + : Iterator[((Int, Int), BDM[Double])] = |
| 230 | + using(fsBc.value.open(uri + "/parts/" + split.asInstanceOf[FilePartition].file)) { in => |
| 231 | + val block = RichDenseMatrixDouble.read(in, bufferSpec) |
| 232 | + Iterator.single(gp.partCoordinates(split.index) -> block) |
| 233 | + } |
| 234 | + |
| 235 | + @transient override val partitioner: Option[Partitioner] = |
| 236 | + Some(gp) |
| 237 | + } |
229 | 238 |
|
230 | 239 | new BlockMatrix(blocks, blockSize, nRows, nCols) |
231 | 240 | } |
|
0 commit comments