Skip to content

Commit

Permalink
Merge pull request #1408 from Imraj-Singh/cuda_stir
Browse files Browse the repository at this point in the history
Add CudaRelativeDifferencePrior (currently limited to 3x3x3 neighbourhood)
  • Loading branch information
KrisThielemans authored Jul 8, 2024
2 parents 32befa6 + 1f0d692 commit b153fda
Show file tree
Hide file tree
Showing 27 changed files with 1,038 additions and 129 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ repos:
rev: '62302476'
hooks:
- id: clang-format
files: \.(c|cc|cxx|cpp|h|hpp|hxx|inl|txx)$
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|inl|txx)$
32 changes: 32 additions & 0 deletions CMakeLists.txt
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,38 @@ else()
message(STATUS "NiftyPET projector support disabled.")
endif()

if(NOT DISABLE_STIR_CUDA)
include(CheckLanguage)
check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
enable_language(CUDA)
find_package(CUDAToolkit QUIET)
if (NOT CUDAToolkit_FOUND)
message(WARNING "CUDAToolkit not detected. Setting DISABLE_STIR_CUDA to ON.")
set(DISABLE_STIR_CUDA ON)
set(STIR_WITH_CUDA OFF)
else()
if("${CMAKE_VERSION}" VERSION_LESS "3.23")
message(FATAL_ERROR "CMake 3.23 or newer is required to use CMAKE_CUDA_ARCHITECTURES set to 'all'. Upgrade your CMake or set DISABLE_STIR_CUDA to ON.")
else()
set(CMAKE_CUDA_ARCHITECTURES "all")
endif()
message(STATUS "STIR CUDA support enabled. Using CUDA version ${CUDAToolkit_VERSION}.")
set(STIR_WITH_CUDA ON)
endif()
# check if run-time is available for ctests
find_program(NVIDIA_SMI nvidia-smi)
if (NOT NVIDIA_SMI)
message(WARNING "nvidia-smi not found. Disabling the ctests using CUDA")
set(SKIP_CUDA_TESTS ON)
endif()
else()
message(WARNING "No CUDA compiler found. Setting DISABLE_STIR_CUDA to ON.")
set(DISABLE_STIR_CUDA ON)
set(STIR_WITH_CUDA OFF)
endif()
endif()

# Parallelproj
if(NOT DISABLE_Parallelproj_PROJECTOR)
find_package(parallelproj 1.3.4 CONFIG)
Expand Down
16 changes: 14 additions & 2 deletions documentation/release_6.2.htm
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ <h2>Overall summary</h2>
</p>

<p>
This release contains mainly code written by Nicole Jurjew (UCL) and Kris Thielemans (UCL).
This release contains mainly code written by Nicole Jurjew (UCL) (SSRB for TOF),
Imraj Singh (UCL) (CUDA version of the Relative Difference Prior) and Kris Thielemans (UCL).
</p>

<h2>Patch release info</h2>
Expand Down Expand Up @@ -51,6 +52,13 @@ <h3>New functionality</h3>
in <code>ProjData</code>).<br>
<a href=https://github.com/UCL/STIR/pull/1439>PR #1439</a> and <a href=https://github.com/UCL/STIR/pull/1448>PR #1448</a>
</li>
<li>
New prior <code>CudaRelativeDifferencePrior</code> (use <tt>Cuda Relative Difference Prior</tt> in <tt>.par</tt> files), only
available if the CUDA toolkit is found during building. Results are identical to <code>RelativeDifferencePrior</code>
up to numerical rounding issues. However, the code is currently limited to 3x3x3 weights.<br>
Added timings for the RDP (both non-CUDA and CUDA) tothe <tt>stir_timings</tt> utility.<br>
<a href=https://github.com/UCL/STIR/pull/1408>PR #1408</a>
</li>
</ul>


Expand Down Expand Up @@ -111,7 +119,7 @@ <h3>Known problems</h3>
<p>See <a href=https://github.com/UCL/STIR/labels/bug>our issue tracker</a>.</p>


<H2>What's new for developers (aside from what should be obvious
<H2>What is new for developers (aside from what should be obvious
from the above):</H2>

<h3>Changed functionality</h3>
Expand Down Expand Up @@ -160,6 +168,10 @@ <h3>Build system</h3>
Force C++ version according to CERN ROOT versions: ROOT 6.28.10 needs C++17 and 6.30.2 needs C++20.
Also some fixes when relying on <code>root-config</code>.
</li>
<li>
Optionally enable CUDA as a CMake language (for the CUDA RDP). <strong>You should use CMake 3.23 or later</strong>
if you use CUDA.
</li>
</ul>

<h3>Test changes</h3>
Expand Down
46 changes: 46 additions & 0 deletions recon_test_pack/CUDA/OSMAPOSL_test_sim_Parallelproj_CUDARDP.par
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
OSMAPOSLParameters :=
; test file for OSMAPOSL with a quadratic prior (and ray tracing projection matrix)
objective function type:= PoissonLogLikelihoodWithLinearModelForMeanAndProjData
PoissonLogLikelihoodWithLinearModelForMeanAndProjData Parameters:=

input file := my_prompts${suffix}.hs
zero end planes of segment 0:= 0 ; segment 0 is has direct and indirect planes
; if disabled, defaults to maximum segment number in the file
;maximum absolute segment number to process := 3

projector pair type := parallelproj
Projector Pair Using Parallelproj Parameters:=
End Projector Pair Using Parallelproj Parameters:=

Bin Normalisation type := from projdata
Bin Normalisation From ProjData :=
normalisation projdata filename:= my_acfs${suffix}.hs
End Bin Normalisation From ProjData:=

additive sinogram := my_additive_sinogram${suffix}.hs

xy output image size (in pixels) := 91
zoom := .5

use subset sensitivities:=1
subset sensitivity filenames:= my_test_sim${suffix}_sens_PP_s${num_subsets}_%d.hv
recompute_sensitivity:=1

prior type := Relative Difference Prior
Relative Difference Prior Parameters:=
penalisation factor := 0.5
; next defaults to 0, set to 1 for 2D inverse Euclidean weights, 0 for 3D
only 2D:= 0
END Relative Difference Prior Parameters:=

end PoissonLogLikelihoodWithLinearModelForMeanAndProjData Parameters:=

output filename prefix := my_test_sim${suffix}_image_OSMAPOSL_PP_CUDARDP
number of subsets:= ${num_subsets}
start at subset:= 0
number of subiterations:= 28
save estimates at subiteration intervals:= ${num_subsets}

;report objective function values interval := 1

END :=
32 changes: 22 additions & 10 deletions recon_test_pack/run_test_simulate_and_recon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# Copyright (C) 2011 - 2011-01-14, Hammersmith Imanet Ltd
# Copyright (C) 2011-07-01 - 2011, Kris Thielemans
# Copyright (C) 2014, 2022 University College London
# Copyright (C) 2014, 2022, 2024 University College London
# This file is part of STIR.
#
# SPDX-License-Identifier: Apache-2.0
Expand All @@ -13,13 +13,7 @@
# Author Kris Thielemans
#

# Scripts should exit with error code when a test fails:
if [ -n "$TRAVIS" -o -n "$GITHUB_WORKSPACE" ]; then
# The code runs inside Travis or GHA
set -e
fi

echo This script should work with STIR version 6.0. If you have
echo This script should work with STIR version 6.2. If you have
echo a later version, you might have to update your test pack.
echo Please check the web site.
echo
Expand Down Expand Up @@ -62,6 +56,8 @@ if [ $# -eq 1 ]; then
fi

echo "Using `command -v OSMAPOSL`"
echo "Using `command -v OSSPS`"
echo "Using `command -v FBP2D`"

# first need to set this to the C locale, as this is what the STIR utilities use
# otherwise, awk might interpret floating point numbers incorrectly
Expand Down Expand Up @@ -102,7 +98,23 @@ input_ROI_mean=`awk 'NR>2 {print $2}' ${input_image}.roistats`
# and reuses its subset sensitivities
for recon in FBP2D FBP3DRP OSMAPOSL OSSPS; do
echo "========== Testing `command -v ${recon}`"
for parfile in ${recon}_test_sim*.par; do
# Check if we have CUDA code and parallelproj.
# If so, check for test files in CUDA/*
if stir_list_registries |grep -i cuda > /dev/null
then
if stir_list_registries |grep -i parallelproj > /dev/null
then
extra_par_files=`ls CUDA/${recon}_test_sim*.par 2> /dev/null`
if [ -n "$TRAVIS" -o -n "$GITHUB_WORKSPACE" ]; then
# The code runs inside Travis or GHA
if [ -n "$extra_par_files" ]; then
echo "Not running ${extra_par_files} due to no CUDA run-time"
extra_par_files=""
fi
fi
fi
fi
for parfile in ${recon}_test_sim*.par ${extra_par_files}; do
for dataSuffix in "" "$TOF_suffix"; do
echo "===== data suffix: \"$dataSuffix\""
# test first if analytic reconstruction and if so, run pre-correction
Expand Down Expand Up @@ -136,7 +148,7 @@ for recon in FBP2D FBP3DRP OSMAPOSL OSSPS; do

# run actual reconstruction
echo "Running ${recon} ${parfile}"
logfile="my_${parfile}${suffix}.log"
logfile="my_`basename ${parfile}`${suffix}.log"
${MPIRUN} ${recon} ${parfile} > "$logfile" 2>&1
if [ $? -ne 0 ]; then
echo "Error running reconstruction. CHECK RECONSTRUCTION LOG \"$logfile\""
Expand Down
2 changes: 1 addition & 1 deletion src/buildblock/SeparableGaussianArrayFilter.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ SeparableGaussianArrayFilter<num_dimensions, elemT>::construct_filter(bool norma
{
std::stringstream ss;
ss << "Gaussian filter dim[" << i << "] =" << filter_coefficients;
info(ss.str(), 2);
info(ss.str(), 3);
}

this->all_1d_array_filters[i - 1].reset(new ArrayFilter1DUsingConvolution<float>(filter_coefficients));
Expand Down
6 changes: 6 additions & 0 deletions src/cmake/STIRConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ if(@STIR_WITH_NiftyPET_PROJECTOR@)
set(STIR_WITH_NiftyPET_PROJECTOR TRUE)
endif()

if(@STIR_WITH_CUDA@)
find_package(CUDAToolkit REQUIRED)
enable_language(CUDA)
set(STIR_WITH_CUDA TRUE)
endif()

if(@STIR_WITH_Parallelproj_PROJECTOR@)
find_package(parallelproj REQUIRED CONFIG)
set(STIR_WITH_Parallelproj_PROJECTOR TRUE)
Expand Down
2 changes: 2 additions & 0 deletions src/cmake/STIRConfig.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ namespace stir {

#cmakedefine STIR_WITH_NiftyPET_PROJECTOR

#cmakedefine STIR_WITH_CUDA

#cmakedefine STIR_WITH_Parallelproj_PROJECTOR
#cmakedefine parallelproj_built_with_CUDA

Expand Down
5 changes: 4 additions & 1 deletion src/include/stir/NumericVectorWithOffset.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ START_NAMESPACE_STIR
template <class T, class elemT>
class NumericVectorWithOffset : public VectorWithOffset<T>
{
private:
#ifdef SWIG
public: // needs to be public for SWIG to be able to parse the "using" statement below
#endif

typedef VectorWithOffset<T> base_type;

public:
Expand Down
12 changes: 10 additions & 2 deletions src/include/stir/RegisteredParsingObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ template <typename Derived, typename Base, typename Parent = Base>
class RegisteredParsingObject : public Parent
{
public:
// import constructors from Parent
// Note: disabled for older SWIG as that generates an error before 4.2, and a warning for 4.2
#if !defined(SWIG) || (SWIG_VERSION >= 0x040300)
using Parent::Parent;
#endif

//! Construct a new object (of type Derived) by parsing the istream
/*! When the istream * is 0, questions are asked interactively.
Expand All @@ -93,6 +99,7 @@ class RegisteredParsingObject : public Parent
inline std::string parameter_info() override;

public:
#ifndef SWIG
//! A helper class to allow automatic registration.
struct RegisterIt
{
Expand All @@ -111,18 +118,19 @@ class RegisteredParsingObject : public Parent
*/
~RegisterIt()
{
#if 0
# if 0
// does not work yet, as registry might be destructed before this
// RegisterIt object. A solution to this problem is coming up.
cerr << "In RegisterIt destructor for " << Derived::registered_name<<endl;
cerr <<"Current keys: ";
Parent::registry().list_keys(cerr);
Parent::registry().remove_from_registry(Derived::registered_name);
#endif
# endif
}
};
// RegisterIt needs to be a friend to have access to registry()
friend struct RegisterIt;
#endif
};

END_NAMESPACE_STIR
Expand Down
69 changes: 69 additions & 0 deletions src/include/stir/cuda_utilities.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
Copyright (C) 2024, University College London
This file is part of STIR.
SPDX-License-Identifier: Apache-2.0
See STIR/LICENSE.txt for details
*/

#ifndef __stir_cuda_utilities_H__
#define __stir_cuda_utilities_H__

/*!
\file
\ingroup CUDA
\brief some utilities for STIR and CUDA
\author Kris Thielemans
*/
#include "stir/Array.h"
#include "stir/info.h"
#include <vector>

START_NAMESPACE_STIR

template <int num_dimensions, typename elemT>
inline void
array_to_device(elemT* dev_data, const Array<num_dimensions, elemT>& stir_array)
{
if (stir_array.is_contiguous())
{
info("array_to_device contiguous", 100);
cudaMemcpy(dev_data, stir_array.get_const_full_data_ptr(), stir_array.size_all() * sizeof(elemT), cudaMemcpyHostToDevice);
stir_array.release_const_full_data_ptr();
}
else
{
info("array_to_device non-contiguous", 100);
// Allocate host memory to get contiguous vector, copy array to it and copy from device to host
std::vector<elemT> tmp_data(stir_array.size_all());
std::copy(stir_array.begin_all(), stir_array.end_all(), tmp_data.begin());
cudaMemcpy(dev_data, tmp_data.data(), stir_array.size_all() * sizeof(elemT), cudaMemcpyHostToDevice);
}
}

template <int num_dimensions, typename elemT>
inline void
array_to_host(Array<num_dimensions, elemT>& stir_array, const elemT* dev_data)
{
if (stir_array.is_contiguous())
{
info("array_to_host contiguous", 100);
cudaMemcpy(stir_array.get_full_data_ptr(), dev_data, stir_array.size_all() * sizeof(elemT), cudaMemcpyDeviceToHost);
stir_array.release_full_data_ptr();
}
else
{
info("array_to_host non-contiguous", 100);
// Allocate host memory for the result and copy from device to host
std::vector<elemT> tmp_data(stir_array.size_all());
cudaMemcpy(tmp_data.data(), dev_data, stir_array.size_all() * sizeof(elemT), cudaMemcpyDeviceToHost);
// Copy the data to the stir_array
std::copy(tmp_data.begin(), tmp_data.end(), stir_array.begin_all());
}
}

END_NAMESPACE_STIR

#endif
5 changes: 5 additions & 0 deletions src/include/stir/doxygengroups.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ be selected at run-time.
\ingroup buildblock
*/

/*!
\defgroup CUDA Items relating to CUDA and STIR buildblock objects.
\ingroup buildblock
*/

/*!
\defgroup data_buildblock Acquisition data building blocks
\ingroup STIR_library
Expand Down
Loading

0 comments on commit b153fda

Please sign in to comment.