Skip to content

Commit

Permalink
Enable fallback fetch in benchmarks.
Browse files Browse the repository at this point in the history
Signed-off-by: Pascal Spörri <psp@zurich.ibm.com>
  • Loading branch information
pspoerri committed Sep 13, 2023
1 parent c0e9ef5 commit 081a163
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 4 deletions.
2 changes: 1 addition & 1 deletion examples/config_3.3.2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export S3A_ENDPOINT="http://10.40.0.29:9000"
export S3A_ACCESS_KEY=${S3A_ACCESS_KEY:-$AWS_ACCESS_KEY_ID}
export S3A_SECRET_KEY=${S3A_SECRET_KEY:-$AWS_SECRET_ACCESS_KEY}
export S3A_OUTPUT_BUCKET=${S3A_BUCKET:-"zrlio-tmp"}
export SHUFFLE_DESTINATION=${SHUFFLE_DESTINATION:-"s3a://zrlio-tmp"}
export SHUFFLE_DESTINATION=${SHUFFLE_DESTINATION:-"s3a://zrlio-tmp/"}

# Datasets
## Terasort
Expand Down
33 changes: 30 additions & 3 deletions examples/run_benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,26 @@ do
for ((i = 0 ; i < ${REPEAT} ; i++));
do
export SIZE=$size
export USE_FALLBACK_FETCH=false

export USE_S3_SHUFFLE=0
export USE_NFS_SHUFFLE=0
export USE_NFS_SHUFFLE=0
./terasort/run.sh || true
mc rm -r --force zac/zrlio-tmp

export USE_S3_SHUFFLE=0
export USE_NFS_SHUFFLE=1
./terasort/run.sh || true
mc rm -r --force zac/zrlio-tmp

export USE_S3_SHUFFLE=1
export USE_NFS_SHUFFLE=0
./terasort/run.sh || true
mc rm -r --force zac/zrlio-tmp

# Enable fallback fetch
export USE_FALLBACK_FETCH=true

export USE_S3_SHUFFLE=0
export USE_NFS_SHUFFLE=1
./terasort/run.sh || true
Expand All @@ -59,11 +74,23 @@ SQL_QUERIES=(
for ((i = 0 ; i < ${REPEAT} ; i++));
do
for query in "${SQL_QUERIES[@]}"; do
export USE_FALLBACK_FETCH=false

export USE_S3_SHUFFLE=0
export USE_NFS_SHUFFLE=1
export USE_NFS_SHUFFLE=0
./sql/run_single_query.sh $query || true


export USE_S3_SHUFFLE=0
export USE_NFS_SHUFFLE=1
./sql/run_single_query.sh $query || true

export USE_S3_SHUFFLE=1
export USE_NFS_SHUFFLE=0
./sql/run_single_query.sh $query || true

# Enable fallback fetch.
export USE_FALLBACK_FETCH=true

export USE_S3_SHUFFLE=0
export USE_NFS_SHUFFLE=1
./sql/run_single_query.sh $query || true
Expand Down
9 changes: 9 additions & 0 deletions examples/sql/run_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ EXECUTOR_MEMORY_OVERHEAD=${EXECUTOR_MEMORY_OVERHEAD:-19000M} # 16G is allocated
INSTANCES=${INSTANCES:-4}

CHECKSUM_ENABLED=${CHECKSUM_ENABLED:-"true"}
USE_FALLBACK_FETCH=${USE_FALLBACK_FETCH:-"false"}

EXTRA_CLASSPATHS='/opt/spark/jars/*'
EXECUTOR_JAVA_OPTIONS="-Dsun.nio.PageAlignDirectMemory=true"
Expand Down Expand Up @@ -54,6 +55,8 @@ SPARK_S3_SHUFFLE_CONFIG=(
--conf spark.shuffle.sort.io.plugin.class=org.apache.spark.shuffle.S3ShuffleDataIO
--conf spark.shuffle.checksum.enabled=${CHECKSUM_ENABLED}
--conf spark.shuffle.s3.rootDir=${SHUFFLE_DESTINATION}
--conf spark.shuffle.s3.useSparkShuffleFetch=${USE_FALLBACK_FETCH}
--conf spark.storage.decommission.fallbackStorage.path=${SHUFFLE_DESTINATION}
)

if (( "$USE_S3_SHUFFLE" == 0 )); then
Expand All @@ -80,6 +83,8 @@ if (( "$USE_NFS_SHUFFLE" == 1 )); then
--conf spark.kubernetes.executor.podTemplateFile=${SCRIPT_DIR}/../templates/executor_nfs.yml
--conf spark.kubernetes.driver.podTemplateFile=${SCRIPT_DIR}/../templates/driver_nfs.yml
--conf spark.hadoop.fs.file.block.size=$((128*1024*1024))
--conf spark.shuffle.s3.useSparkShuffleFetch=${USE_FALLBACK_FETCH}
--conf spark.storage.decommission.fallbackStorage.path=file:///nfs/
)

SPARK_KUBERNETES_TEMPLATES=(
Expand All @@ -88,6 +93,10 @@ if (( "$USE_NFS_SHUFFLE" == 1 )); then
)
fi

if [[ "${USE_FALLBACK_FETCH}" == "true" ]]; then
PROCESS_TAG="${PROCESS_TAG}-fallback"
fi

USE_PROFILER=${USE_PROFILER:-0}
if (( "${USE_PROFILER}" == 1 )); then
PROFILER_CONFIG="reporter=com.uber.profiling.reporters.InfluxDBOutputReporter,configProvider=com.uber.profiling.YamlConfigProvider,configFile=/profiler_config.yml,metricInterval=5000,sampleInterval=5000,ioProfiling=true"
Expand Down
9 changes: 9 additions & 0 deletions examples/terasort/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ SIZE=${SIZE:-1g}
# Shuffle on S3
USE_S3_SHUFFLE=${USE_S3_SHUFFLE:-1}
CHECKSUM_ENABLED=${CHECKSUM_ENABLED:-"true"}
USE_FALLBACK_FETCH=${USE_FALLBACK_FETCH:-"false"}

EXTRA_CLASSPATHS='/opt/spark/jars/*'
EXECUTOR_JAVA_OPTIONS="-Dsun.nio.PageAlignDirectMemory=true"
Expand Down Expand Up @@ -56,6 +57,8 @@ SPARK_S3_SHUFFLE_CONFIG=(
--conf spark.shuffle.sort.io.plugin.class=org.apache.spark.shuffle.S3ShuffleDataIO
--conf spark.shuffle.checksum.enabled=${CHECKSUM_ENABLED}
--conf spark.shuffle.s3.rootDir=${SHUFFLE_DESTINATION}
--conf spark.shuffle.s3.useSparkShuffleFetch=${USE_FALLBACK_FETCH}
--conf spark.storage.decommission.fallbackStorage.path=${SHUFFLE_DESTINATION}
)

if (( "$USE_S3_SHUFFLE" == 0 )); then
Expand All @@ -80,6 +83,8 @@ if (( "$USE_NFS_SHUFFLE" == 1 )); then
--conf spark.shuffle.checksum.enabled=${CHECKSUM_ENABLED}
--conf spark.shuffle.s3.rootDir=file:///nfs/
--conf spark.hadoop.fs.file.block.size=$((128*1024*1024))
--conf spark.shuffle.s3.useSparkShuffleFetch=${USE_FALLBACK_FETCH}
--conf spark.storage.decommission.fallbackStorage.path=file:///nfs/
)

SPARK_KUBERNETES_TEMPLATES=(
Expand All @@ -88,6 +93,10 @@ if (( "$USE_NFS_SHUFFLE" == 1 )); then
)
fi

if [[ "${USE_FALLBACK_FETCH}" == "true" ]]; then
PROCESS_TAG="${PROCESS_TAG}-fallback"
fi

USE_PROFILER=${USE_PROFILER:-0}
if (( "${USE_PROFILER}" == 1 )); then
PROFILER_CONFIG="reporter=com.uber.profiling.reporters.InfluxDBOutputReporter,configProvider=com.uber.profiling.YamlConfigProvider,configFile=/profiler_config.yml,metricInterval=5000,sampleInterval=5000,ioProfiling=true"
Expand Down

0 comments on commit 081a163

Please sign in to comment.