Skip to content

Commit

Permalink
Use move files using NIO if the shuffle dir is mounted as a file system.
Browse files Browse the repository at this point in the history
Signed-off-by: Pascal Spörri <psp@zurich.ibm.com>
  • Loading branch information
pspoerri committed Sep 22, 2023
1 parent 0b56e2b commit 1594c8d
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@

package org.apache.spark.shuffle

import org.apache.spark.TaskContext
import org.apache.spark.internal.Logging
import org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter
import org.apache.spark.shuffle.helper.{S3ShuffleDispatcher, S3ShuffleHelper}
import org.apache.spark.storage.ShuffleDataBlockId
import org.apache.spark.util.Utils

import java.io.{File, FileInputStream}
import java.nio.file.{Files, Path}

class S3SingleSpillShuffleMapOutputWriter(shuffleId: Int, mapId: Long) extends SingleSpillShuffleMapOutputWriter {
class S3SingleSpillShuffleMapOutputWriter(shuffleId: Int, mapId: Long) extends SingleSpillShuffleMapOutputWriter with Logging {

private lazy val dispatcher = S3ShuffleDispatcher.get

Expand All @@ -21,12 +24,34 @@ class S3SingleSpillShuffleMapOutputWriter(shuffleId: Int, mapId: Long) extends S
partitionLengths: Array[Long],
checksums: Array[Long]
): Unit = {
val in = new FileInputStream(mapSpillFile)
val block = ShuffleDataBlockId(shuffleId, mapId, IndexShuffleBlockResolver.NOOP_REDUCE_ID)
val out = new S3MeasureOutputStream(dispatcher.createBlock(block), block.name)

// Note: HDFS does not exposed a nio-buffered write interface.
Utils.copyStream(in, out, closeStreams = true)
if (dispatcher.rootIsLocal) {
// Use NIO to move the file if the folder is local.
val now = System.nanoTime()
val path = dispatcher.getPath(block)
val fileDestination = path.toUri.getRawPath
val dir = path.getParent
if (!dispatcher.fs.exists(dir)) {
dispatcher.fs.mkdirs(dir)
}
Files.move(mapSpillFile.toPath, Path.of(fileDestination))
val timings = System.nanoTime() - now

val bytes = partitionLengths.sum
val tc = TaskContext.get()
val sId = tc.stageId()
val sAt = tc.stageAttemptNumber()
val t = timings / 1000000
val bw = bytes.toDouble / (t.toDouble / 1000) / (1024 * 1024)
logInfo(s"Statistics: Stage ${sId}.${sAt} TID ${tc.taskAttemptId()} -- " +
s"Writing ${block.name} ${bytes} took ${t} ms (${bw} MiB/s)")
} else {
// Copy using a stream.
val in = new FileInputStream(mapSpillFile)
val out = new S3MeasureOutputStream(dispatcher.createBlock(block), block.name)
Utils.copyStream(in, out, closeStreams = true)
}

if (dispatcher.checksumEnabled) {
S3ShuffleHelper.writeChecksum(shuffleId, mapId, checksums)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class S3ShuffleDispatcher extends Logging {
}
private val rootDir_ = if (useSparkShuffleFetch) fallbackStoragePath else conf.get("spark.shuffle.s3.rootDir", defaultValue = "sparkS3shuffle/")
val rootDir: String = if (rootDir_.endsWith("/")) rootDir_ else rootDir_ + "/"
val rootIsLocal: Boolean = URI.create(rootDir).getScheme == "file"

// Optional
val bufferSize: Int = conf.getInt("spark.shuffle.s3.bufferSize", defaultValue = 8 * 1024 * 1024)
Expand Down

0 comments on commit 1594c8d

Please sign in to comment.