Skip to content

Commit

Permalink
added OpenMP support for host-side loops
Browse files Browse the repository at this point in the history
 see ComputationalRadiationPhysics#51

 Some observations:
  - we have only trivial (and fast) loops
  - the other loops are integral steps of the simulation and can not be
    parallelized (sequential steps and maybe with device-code)
  - one of the parsing loops uses cudaSetDevice, not sure if it's
    possible to parallelize that in a good way.
  - parallelizing std::vector is ok as long as the length is fixed (no
    reallocation). That means, we may not use vector.push_back() or
    vector.insert() inside a loop with OpenMP pragmas. Compiler might
    not complain...

 Only "easy" loops were parallelized, only basic pragmas were used.
 Might give some speedup one day, but if not... no problem. Pragmas and
 code changes are non-intrusive enough to keep it maintainable.
  • Loading branch information
slizzered committed Jun 8, 2015
1 parent 9e9e59a commit 91625ac
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 39 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT})
################################################################################
# GNU
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
Expand Down
86 changes: 47 additions & 39 deletions src/interpolation.cu → src/interpolation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <vector>
#include <assert.h>
#include <omp.h> /* pragma omp parallel for */

#include <interpolation.hpp>
#include <logging.hpp>
Expand Down Expand Up @@ -102,53 +103,57 @@ std::vector<double> interpolateLinear(const std::vector<double> y, const std::ve

// Monochromatic case
if(y.size() == 1){
return std::vector<double>(nInterpolations, y.at(0));
return std::vector<double>(nInterpolations, y.at(0));
}

// Check x data if monoton increasing
assert(isStrictlySorted(x));

// Create equidistant interpolation on x axis
std::vector<double> interpolated_x(nInterpolations, 0);

#pragma omp parallel for
for(unsigned i = 0; i < nInterpolations; ++i){
interpolated_x.at(i) = x_min + (i * (x_range / nInterpolations));
interpolated_x.at(i) = x_min + (i * (x_range / nInterpolations));
}

// Start to interpolate y values for every x value
std::vector<double> interpolated_y(nInterpolations, 0);
for(unsigned i = 0; i < interpolated_x.size(); ++i){
// Get index of points before and after x
double y1_i = getNextSmallerIndex(x, interpolated_x.at(i));
double y2_i = getNextBiggerIndex(x, interpolated_x.at(i));
int y_diff = y2_i - y1_i;

if(y_diff == 1){
// First point p1=(x1/y1) before x
double x1 = x_min + y1_i;
double y1 = y.at(y1_i);

// Second point p2=(x2/y2) after x
double x2 = x_min + y2_i;
double y2 = y.at(y2_i);
assert(y.size() >= y1_i);

// linear function between p1 and p2 (y=mx+b)
double m = (y2 - y1) / (x2 / x1);
double b = y1 - (m * x1);

// Interpolate y from linear function
interpolated_y.at(i) = m * interpolated_x.at(i) + b;
#pragma omp parallel for
for(unsigned i = 0; i < nInterpolations; ++i){
// Get index of points before and after x
double y1_i = getNextSmallerIndex(x, interpolated_x.at(i));
double y2_i = getNextBiggerIndex(x, interpolated_x.at(i));
int y_diff = y2_i - y1_i;

if(y_diff == 1){
// First point p1=(x1/y1) before x
double x1 = x_min + y1_i;
double y1 = y.at(y1_i);

// Second point p2=(x2/y2) after x
double x2 = x_min + y2_i;
double y2 = y.at(y2_i);
assert(y.size() >= y1_i);

// linear function between p1 and p2 (y=mx+b)
double m = (y2 - y1) / (x2 / x1);
double b = y1 - (m * x1);

// Interpolate y from linear function
interpolated_y.at(i) = m * interpolated_x.at(i) + b;

}
else if(y_diff == 2){
// No interpolation needed
interpolated_y.at(i) = y.at(y1_i + 1);
}
else {
dout(V_ERROR) << "Index of smaller and bigger sigma too seperated" << std::endl;
exit(1);
}

}
else if(y_diff == 2){
// No interpolation needed
interpolated_y.at(i) = y.at(y1_i + 1);
}
else {
dout(V_ERROR) << "Index of smaller and bigger sigma too seperated" << std::endl;
exit(0);
}

}

return interpolated_y;
Expand Down Expand Up @@ -184,17 +189,20 @@ std::vector<double> interpolateWavelength(const std::vector<double> sigma_y, con
}

std::vector<double> y(interpolation_range, 0);
const double lambda_range = lambda_stop - lambda_start;
const unsigned lambda_range = lambda_stop - lambda_start;
assert(sigma_y.size() >= lambda_range);

// Generate sigma_x
std::vector<double> sigma_x;
for(unsigned i = lambda_start; i <= lambda_stop; ++i){
sigma_x.push_back(i);
std::vector<double> sigma_x(lambda_range, 0);

#pragma omp parallel for
for(unsigned i = 0; i < lambda_range; ++i){
sigma_x[i] = i+lambda_start;
}

#pragma omp parallel for
for(unsigned i = 0; i < interpolation_range; ++i){
double x = lambda_start + (i * (lambda_range / interpolation_range));
double x = lambda_start + (i * (static_cast<double>(lambda_range) / interpolation_range));

// Get index of points before and after x
double y1_i = getNextSmallerIndex(sigma_x, x);
Expand Down Expand Up @@ -225,7 +233,7 @@ std::vector<double> interpolateWavelength(const std::vector<double> sigma_y, con
}
else {
dout(V_ERROR) << "Index of smaller and bigger sigma too seperated" << std::endl;
exit(0);
exit(1);
}

}
Expand Down
4 changes: 4 additions & 0 deletions src/parser.cu → src/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <vector> /* vector */
#include <assert.h>
#include <stdlib.h> /* exit() */
#include <omp.h> /* pragma omp parallel for */

#include <logging.hpp>
#include <mesh.hpp>
Expand Down Expand Up @@ -331,6 +332,7 @@ Mesh createMesh(const std::vector<unsigned> &triangleIndices,
// GPU variables
double totalSurface = 0.;

#pragma omp parallel for reduction(+:totalSurface)
for(unsigned i=0;i<numberOfTriangles;++i){
totalSurface+=double(surfacesVector.at(i));
}
Expand All @@ -341,6 +343,8 @@ Mesh createMesh(const std::vector<unsigned> &triangleIndices,
std::vector<double> hostCenters(xOfTriangleCenter.begin(), xOfTriangleCenter.end());
hostCenters.insert(hostCenters.end(),yOfTriangleCenter.begin(),yOfTriangleCenter.end());
std::vector<float> totalReflectionAngles(refractiveIndices.size()/2,0);

#pragma omp parallel for
for(unsigned i=0;i<refractiveIndices.size();i+=2){
totalReflectionAngles.at(i/2) = (180. / M_PI * asin(refractiveIndices.at(i+1) / refractiveIndices.at(i)));
}
Expand Down
2 changes: 2 additions & 0 deletions src/write_to_vtk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <iomanip> /* std::fixed, std::setprecision() */
#include <string> /* std::to_string() */
#include <time.h> /* time, time_t */
#include <omp.h> /* pragma omp parallel for */

#include <boost/filesystem.hpp> /* fs::path */
#include <boost/filesystem/fstream.hpp> /* fs::ofstream, fs::ifstream */
Expand Down Expand Up @@ -195,6 +196,7 @@ std::vector<double> compareVtk(std::vector<double> compare, const fs::path filen
}
dout(V_INFO) << "Compare solution with " << filename << std::endl;

#pragma omp parallel for reduction (+:aseTotal)
for(unsigned i = 0; i < compare.size(); ++i){
aseTotal += compare.at(i);
}
Expand Down

0 comments on commit 91625ac

Please sign in to comment.