Skip to content

Commit

Permalink
Merge branch 'develop' into artv3/ltimes-benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
rhornung67 authored Oct 1, 2024
2 parents 5a8ee06 + 1ddae3d commit 94f89e3
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 3 deletions.
9 changes: 7 additions & 2 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ raja_add_benchmark(
SOURCES benchmark-atomic.cpp)
endif()

#TODO: Fix build issues
#raja_add_benchmark(
# NAME ltimes
# SOURCES ltimes.cpp)

raja_add_benchmark(
NAME ltimes
SOURCES ltimes.cpp)
NAME raja_view_blur
SOURCES raja_view_blur.cpp)
218 changes: 218 additions & 0 deletions benchmark/raja_view_blur.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) 2016-24, Lawrence Livermore National Security, LLC
// and RAJA project contributors. See the RAJA/LICENSE file for details.
//
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#include <RAJA/RAJA.hpp>
#include "RAJA/util/Timer.hpp"
#include <iostream>

/*
* RAJA view performance test
* Kernel performs a 2D Gaussian blur
*
*/

//Uncomment to specify variant
//#define RUN_HIP_VARIANT
//#define RUN_CUDA_VARIANT
//#define RUN_SYCL_VARIANT
//#define RUN_OPENMP_VARIANT
#define RUN_SEQ_VARIANT


using host_pol = RAJA::seq_exec;
using host_resources = RAJA::resources::Host;


#if defined(RAJA_ENABLE_HIP) && defined(RUN_HIP_VARIANT)
using device_pol = RAJA::hip_exec<256>;
using device_resources = RAJA::resource::Hip;

using kernel_pol = RAJA::KernelPolicy<
RAJA::statement::HipKernelFixed<256,
RAJA::statement::For<1, RAJA::hip_global_size_y_direct<16>,
RAJA::statement::For<0, RAJA::hip_global_size_x_direct<16>,
RAJA::statement::Lambda<0>
>
>
>
>;
#endif

#if defined(RAJA_ENABLE_CUDA) && defined(RUN_CUDA_VARIANT)
using device_pol = RAJA::cuda_exec<256>;
using device_resources = RAJA::resources::Cuda;

using kernel_pol = RAJA::KernelPolicy<
RAJA::statement::CudaKernelFixed<256,
RAJA::statement::For<1, RAJA::cuda_global_size_y_direct<16>,
RAJA::statement::For<0, RAJA::cuda_global_size_x_direct<16>,
RAJA::statement::Lambda<0>
>
>
>
>;
#endif

#if defined(RAJA_ENABLE_SYCL) && defined(RUN_SYCL_VARIANT)
using device_pol = RAJA::sycl_exec<256>;
using device_resources = RAJA::resources::Sycl;

using kernel_pol = RAJA::KernelPolicy<
RAJA::statement::SyclKernel<
RAJA::statement::For<1, RAJA::sycl_global_item_1,
RAJA::statement::For<0, RAJA::sycl_global_item_2,
RAJA::statement::Lambda<0>
>
>
>
>;
#endif

#if defined(RAJA_ENABLE_OPENMP) && defined(RUN_OPENMP_VARIANT)
using device_pol = RAJA::omp_parallel_for_exec;
using device_resources = RAJA::resources::Host;

using kernel_pol = RAJA::KernelPolicy<
RAJA::statement::For<1, RAJA::omp_parallel_for_exec,
RAJA::statement::For<0, RAJA::seq_exec,
RAJA::statement::Lambda<0>
>
>
>;
#endif

#if defined(RUN_SEQ_VARIANT)
using device_pol = RAJA::seq_exec;
using device_resources = RAJA::resources::Host;

using kernel_pol = RAJA::KernelPolicy<
RAJA::statement::For<1, RAJA::seq_exec,
RAJA::statement::For<0, RAJA::seq_exec,
RAJA::statement::Lambda<0>
>
>
>;
#endif

int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
{

const int N = 10000;
const int K = 17;

device_resources def_device_res{device_resources::get_default()};
host_resources def_host_res{host_resources::get_default()};

auto timer = RAJA::Timer();

//launch to intialize the stream
RAJA::forall<device_pol>
(RAJA::RangeSegment(0,1), [=] RAJA_HOST_DEVICE (int i) {
});

int * array = def_host_res.allocate<int>(N * N);
int * array_copy = def_host_res.allocate<int>(N * N);

//big array, or image
for (int i = 0; i < N * N; ++i) {
array[i] = 1;
array_copy[i] = 1;
}

//small array that acts as the blur
int * kernel = def_host_res.allocate<int>(K * K);
for (int i = 0; i < K * K; ++i) {
kernel[i] = 2;
}

// copying to gpu
int* d_array = def_device_res.allocate<int>(N * N);
int* d_array_copy = def_device_res.allocate<int>(N * N);
int* d_kernel = def_device_res.allocate<int>(K * K);

def_device_res.memcpy(d_array, array, N * N * sizeof(int));
def_device_res.memcpy(d_array_copy, array_copy, N * N * sizeof(int));
def_device_res.memcpy(d_kernel, kernel, K * K * sizeof(int));

constexpr int DIM = 2;
RAJA::View<int, RAJA::Layout<DIM, int, 1>> array_view(d_array, N, N);
RAJA::View<int, RAJA::Layout<DIM, int, 1>> array_view_copy(d_array_copy, N, N);
RAJA::View<int, RAJA::Layout<DIM, int, 1>> kernel_view(d_kernel, K, K);

RAJA::RangeSegment range_i(0, N);
RAJA::RangeSegment range_j(0, N);

timer.start();

RAJA::kernel<kernel_pol>
(RAJA::make_tuple(range_i, range_j),
[=] RAJA_HOST_DEVICE (int i, int j) {
int sum = 0;

//looping through the "blur"
for (int m = 0; m < K; ++m) {
for (int n = 0; n < K; ++n) {
int x = i + m;
int y = j + n;

// adding the "blur" to the "image" wherever the blur is located on the image
if (x < N && y < N) {
sum += kernel_view(m, n) * array_view(x, y);
}
}
}

array_view(i, j) += sum;
}
);

timer.stop();

std::cout<<"Elapsed time with RAJA view : "<<timer.elapsed()<<std::endl;


timer.reset();
timer.start();

RAJA::kernel<kernel_pol>
(RAJA::make_tuple(range_i, range_j),
[=] RAJA_HOST_DEVICE (int i, int j) {
int sum = 0;

// looping through the "blur"
for (int m = 0; m < K; ++m) {
for (int n = 0; n < K; ++n) {
int x = i + m;
int y = j + n;

// adding the "blur" to the "image" wherever the blur is located on the image
if (x < N && y < N) {
sum += d_kernel[m * K + n] * d_array_copy[x * N + y];
}
}
}

d_array_copy[i * N + j] += sum;
}
);
timer.stop();

std::cout<<"Elapsed time with NO RAJA view : "<<timer.elapsed()<<std::endl;

def_device_res.memcpy(array, d_array, N * N * sizeof(int));
def_device_res.memcpy(array_copy, d_array_copy, N * N * sizeof(int));

def_device_res.deallocate(d_array);
def_device_res.deallocate(d_array_copy);
def_device_res.deallocate(d_kernel);

def_host_res.deallocate(array);
def_host_res.deallocate(array_copy);
def_host_res.deallocate(kernel);

return 0;
}
2 changes: 1 addition & 1 deletion scripts/lc-builds/corona_sycl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ DATE=$(printf '%(%Y-%m-%d)T\n' -1)
export PATH=${SYCL_PATH}/bin:$PATH
export LD_LIBRARY_PATH=${SYCL_PATH}/lib:${SYCL_PATH}/lib64:$LD_LIBRARY_PATH

module load cmake/3.24.2
module load cmake/3.23.1

cmake \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
Expand Down

0 comments on commit 94f89e3

Please sign in to comment.