diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
new file mode 100644
index 000000000..7a805dce4
--- /dev/null
+++ b/.github/workflows/cuda.yml
@@ -0,0 +1,78 @@
+# Copyright 2024 Matt Borland
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt)
+
+name: GPUs
+on:
+  push:
+    branches:
+      - master
+      - develop
+      - feature/**
+  pull_request:
+  release:
+    types: [published, created, edited]
+
+concurrency:
+  group: ${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  cuda-cmake-test:
+    strategy:
+      fail-fast: false
+
+    runs-on: gpu-runner-1
+
+    steps:
+      - uses: Jimver/cuda-toolkit@v0.2.16
+        id: cuda-toolkit
+        with:
+          cuda: '12.5.0'
+          method: 'network'
+          sub-packages: '["nvcc"]'
+
+      - name: Output CUDA information
+        run: |
+          echo "Installed cuda version is: ${{steps.cuda-toolkit.outputs.cuda}}"+
+          echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
+          nvcc -V
+      - uses: actions/checkout@v4
+
+      - name: Install Packages
+        run: |
+          sudo apt-get install -y cmake make
+      - name: Setup Boost
+        run: |
+          echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY
+          LIBRARY=${GITHUB_REPOSITORY#*/}
+          echo LIBRARY: $LIBRARY
+          echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV
+          echo GITHUB_BASE_REF: $GITHUB_BASE_REF
+          echo GITHUB_REF: $GITHUB_REF
+          REF=${GITHUB_BASE_REF:-$GITHUB_REF}
+          REF=${REF#refs/heads/}
+          echo REF: $REF
+          BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true
+          echo BOOST_BRANCH: $BOOST_BRANCH
+          cd ..
+          git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root
+          cd boost-root
+          mkdir -p libs/$LIBRARY
+          cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY
+          git submodule update --init tools/boostdep
+          python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY
+      - name: Configure
+        run: |
+          cd ../boost-root
+          mkdir __build__ && cd __build__
+          cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_DECIMAL_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=70 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.5 ..
+      - name: Build tests
+        run: |
+          cd ../boost-root/__build__
+          cmake --build . --target tests -j $(nproc)
+      - name: Run tests
+        run: |
+          cd ../boost-root/__build__
+          ctest --output-on-failure --no-tests=error
+          
diff --git a/include/boost/decimal/decimal32_fast.hpp b/include/boost/decimal/decimal32_fast.hpp
index 8ac613880..b90581c7c 100644
--- a/include/boost/decimal/decimal32_fast.hpp
+++ b/include/boost/decimal/decimal32_fast.hpp
@@ -6,6 +6,8 @@
 #define BOOST_DECIMAL_DECIMAL32_FAST_HPP
 
 #include <boost/decimal/decimal32.hpp>
+#include <boost/decimal/detail/config.hpp>
+#include <boost/decimal/detail/numeric_limits.hpp>
 #include <boost/decimal/detail/apply_sign.hpp>
 #include <boost/decimal/detail/type_traits.hpp>
 #include <boost/decimal/detail/integer_search_trees.hpp>
@@ -16,7 +18,7 @@
 #include <boost/decimal/detail/div_impl.hpp>
 #include <boost/decimal/detail/promote_significand.hpp>
 #include <boost/decimal/detail/ryu/ryu_generic_128.hpp>
-#include <limits>
+#include <boost/decimal/detail/numeric_limits.hpp>
 #include <cstdint>
 
 namespace boost {
@@ -24,9 +26,9 @@ namespace decimal {
 
 namespace detail {
 
-BOOST_DECIMAL_CONSTEXPR_VARIABLE auto d32_fast_inf = std::numeric_limits<std::uint_fast32_t>::max();
-BOOST_DECIMAL_CONSTEXPR_VARIABLE auto d32_fast_qnan = std::numeric_limits<std::uint_fast32_t>::max() - 1;
-BOOST_DECIMAL_CONSTEXPR_VARIABLE auto d32_fast_snan = std::numeric_limits<std::uint_fast32_t>::max() - 2;
+BOOST_DECIMAL_CONSTEXPR_VARIABLE auto d32_fast_inf = boost::decimal::detail::numeric_limits<std::uint_fast32_t>::max();
+BOOST_DECIMAL_CONSTEXPR_VARIABLE auto d32_fast_qnan = boost::decimal::detail::numeric_limits<std::uint_fast32_t>::max() - 1;
+BOOST_DECIMAL_CONSTEXPR_VARIABLE auto d32_fast_snan = boost::decimal::detail::numeric_limits<std::uint_fast32_t>::max() - 2;
 
 struct decimal32_fast_components
 {
@@ -56,302 +58,302 @@ class decimal32_fast final
     exponent_type exponent_ {};
     bool sign_ {};
 
-    constexpr auto isneg() const noexcept -> bool
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto isneg() const noexcept -> bool
     {
         return sign_;
     }
 
-    constexpr auto full_significand() const noexcept -> significand_type
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto full_significand() const noexcept -> significand_type
     {
         return significand_;
     }
 
-    constexpr auto unbiased_exponent() const noexcept -> exponent_type
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto unbiased_exponent() const noexcept -> exponent_type
     {
         return exponent_;
     }
 
-    constexpr auto biased_exponent() const noexcept -> biased_exponent_type
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto biased_exponent() const noexcept -> biased_exponent_type
     {
         return static_cast<biased_exponent_type>(exponent_) - detail::bias_v<decimal32>;
     }
 
-    friend constexpr auto div_impl(decimal32_fast lhs, decimal32_fast rhs, decimal32_fast& q, decimal32_fast& r) noexcept -> void;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto div_impl(decimal32_fast lhs, decimal32_fast rhs, decimal32_fast& q, decimal32_fast& r) noexcept -> void;
 
-    friend constexpr auto mod_impl(decimal32_fast lhs, decimal32_fast rhs, const decimal32_fast& q, decimal32_fast& r) noexcept -> void;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto mod_impl(decimal32_fast lhs, decimal32_fast rhs, const decimal32_fast& q, decimal32_fast& r) noexcept -> void;
 
     // Attempts conversion to integral type:
     // If this is nan sets errno to EINVAL and returns 0
     // If this is not representable sets errno to ERANGE and returns 0
     template <typename Decimal, typename TargetType>
-    friend constexpr auto to_integral(Decimal val) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto to_integral(Decimal val) noexcept
         BOOST_DECIMAL_REQUIRES_TWO_RETURN(detail::is_decimal_floating_point_v, Decimal, detail::is_integral_v, TargetType, TargetType);
 
     template <typename Decimal, typename TargetType>
-    friend BOOST_DECIMAL_CXX20_CONSTEXPR auto to_float(Decimal val) noexcept
+    friend BOOST_DECIMAL_CXX20_CONSTEXPR BOOST_DECIMAL_GPU_ENABLED auto to_float(Decimal val) noexcept
         BOOST_DECIMAL_REQUIRES_TWO_RETURN(detail::is_decimal_floating_point_v, Decimal, detail::is_floating_point_v, TargetType, TargetType);
 
     template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE TargetType, BOOST_DECIMAL_DECIMAL_FLOATING_TYPE Decimal>
-    friend constexpr auto to_decimal(Decimal val) noexcept -> TargetType;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto to_decimal(Decimal val) noexcept -> TargetType;
 
     // Equality template between any integer type and decimal32
     template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE Decimal, BOOST_DECIMAL_INTEGRAL Integer>
-    friend constexpr auto mixed_equality_impl(Decimal lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto mixed_equality_impl(Decimal lhs, Integer rhs) noexcept
         -> std::enable_if_t<(detail::is_decimal_floating_point_v<Decimal> && detail::is_integral_v<Integer>), bool>;
 
     template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE Decimal1, BOOST_DECIMAL_DECIMAL_FLOATING_TYPE Decimal2>
-    friend constexpr auto mixed_decimal_equality_impl(Decimal1 lhs, Decimal2 rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto mixed_decimal_equality_impl(Decimal1 lhs, Decimal2 rhs) noexcept
         -> std::enable_if_t<(detail::is_decimal_floating_point_v<Decimal1> &&
                              detail::is_decimal_floating_point_v<Decimal2>), bool>;
 
     // Template to compare operator< for any integer type and decimal32
     template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE Decimal, BOOST_DECIMAL_INTEGRAL Integer>
-    friend constexpr auto less_impl(Decimal lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto less_impl(Decimal lhs, Integer rhs) noexcept
         -> std::enable_if_t<(detail::is_decimal_floating_point_v<Decimal> && detail::is_integral_v<Integer>), bool>;
 
     template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE Decimal1, BOOST_DECIMAL_DECIMAL_FLOATING_TYPE Decimal2>
-    friend constexpr auto mixed_decimal_less_impl(Decimal1 lhs, Decimal2 rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto mixed_decimal_less_impl(Decimal1 lhs, Decimal2 rhs) noexcept
         -> std::enable_if_t<(detail::is_decimal_floating_point_v<Decimal1> &&
                              detail::is_decimal_floating_point_v<Decimal2>), bool>;
 
 public:
-    constexpr decimal32_fast() noexcept {}
+    BOOST_DECIMAL_GPU_ENABLED constexpr decimal32_fast() noexcept {}
 
     template <typename T1, typename T2, std::enable_if_t<detail::is_integral_v<T1> && detail::is_integral_v<T2>, bool> = true>
-    constexpr decimal32_fast(T1 coeff, T2 exp, bool sign = false) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED constexpr decimal32_fast(T1 coeff, T2 exp, bool sign = false) noexcept;
 
     template <typename Integer, std::enable_if_t<detail::is_integral_v<Integer>, bool> = true>
-    constexpr decimal32_fast(Integer coeff) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED constexpr decimal32_fast(Integer coeff) noexcept;
 
     template <typename Float, std::enable_if_t<detail::is_floating_point_v<Float>, bool> = true>
-    explicit BOOST_DECIMAL_CXX20_CONSTEXPR decimal32_fast(Float val) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit BOOST_DECIMAL_CXX20_CONSTEXPR decimal32_fast(Float val) noexcept;
 
     constexpr decimal32_fast(const decimal32_fast& val) noexcept = default;
     constexpr decimal32_fast(decimal32_fast&& val) noexcept = default;
-    constexpr auto operator=(const decimal32_fast& val) noexcept -> decimal32_fast& = default;
-    constexpr auto operator=(decimal32_fast&& val) noexcept -> decimal32_fast& = default;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator=(const decimal32_fast& val) noexcept -> decimal32_fast& = default;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator=(decimal32_fast&& val) noexcept -> decimal32_fast& = default;
 
     // cmath functions that are easier as friends
-    friend constexpr auto signbit(decimal32_fast val) noexcept -> bool;
-    friend constexpr auto isinf(decimal32_fast val) noexcept -> bool;
-    friend constexpr auto isnan(decimal32_fast val) noexcept -> bool;
-    friend constexpr auto issignaling(decimal32_fast val) noexcept -> bool;
-    friend constexpr auto isnormal(decimal32_fast val) noexcept -> bool;
-    friend constexpr auto isfinite(decimal32_fast val) noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr auto signbit(decimal32_fast val) noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr auto isinf(decimal32_fast val) noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr auto isnan(decimal32_fast val) noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr auto issignaling(decimal32_fast val) noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr auto isnormal(decimal32_fast val) noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr auto isfinite(decimal32_fast val) noexcept -> bool;
 
     // Comparison operators
-    friend constexpr auto operator==(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
-    friend constexpr auto operator!=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
-    friend constexpr auto operator<(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
-    friend constexpr auto operator<=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
-    friend constexpr auto operator>(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
-    friend constexpr auto operator>=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator==(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator!=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
 
     // Mixed comparisons
     template <typename Integer>
-    friend constexpr auto operator==(decimal32_fast lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator==(decimal32_fast lhs, Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool);
 
     template <typename Integer>
-    friend constexpr auto operator==(Integer lhs, decimal32_fast rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator==(Integer lhs, decimal32_fast rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool);
 
     template <typename Integer>
-    friend constexpr auto operator!=(decimal32_fast lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator!=(decimal32_fast lhs, Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool);
 
     template <typename Integer>
-    friend constexpr auto operator!=(Integer lhs, decimal32_fast rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator!=(Integer lhs, decimal32_fast rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool);
 
     template <typename Integer>
-    friend constexpr auto operator<(decimal32_fast lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<(decimal32_fast lhs, Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool);
 
     template <typename Integer>
-    friend constexpr auto operator<(Integer lhs, decimal32_fast rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<(Integer lhs, decimal32_fast rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool);
 
     template <typename Integer>
-    friend constexpr auto operator<=(decimal32_fast lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=(decimal32_fast lhs, Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool);
 
     template <typename Integer>
-    friend constexpr auto operator<=(Integer lhs, decimal32_fast rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=(Integer lhs, decimal32_fast rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool);
 
     template <typename Integer>
-    friend constexpr auto operator>(decimal32_fast lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>(decimal32_fast lhs, Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool);
 
     template <typename Integer>
-    friend constexpr auto operator>(Integer lhs, decimal32_fast rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>(Integer lhs, decimal32_fast rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool);
 
     template <typename Integer>
-    friend constexpr auto operator>=(decimal32_fast lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>=(decimal32_fast lhs, Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool);
 
     template <typename Integer>
-    friend constexpr auto operator>=(Integer lhs, decimal32_fast rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>=(Integer lhs, decimal32_fast rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool);
 
     #ifdef BOOST_DECIMAL_HAS_SPACESHIP_OPERATOR
 
-    friend constexpr auto operator<=>(decimal32_fast lhs, decimal32_fast rhs) noexcept -> std::partial_ordering;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=>(decimal32_fast lhs, decimal32_fast rhs) noexcept -> std::partial_ordering;
 
     template <typename Integer>
-    friend constexpr auto operator<=>(decimal32_fast lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=>(decimal32_fast lhs, Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, std::partial_ordering);
 
     template <typename Integer>
-    friend constexpr auto operator<=>(Integer lhs, decimal32_fast rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=>(Integer lhs, decimal32_fast rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, std::partial_ordering);
 
     #endif
 
     // Unary operators
-    friend constexpr auto operator+(decimal32_fast rhs) noexcept -> decimal32_fast;
-    friend constexpr auto operator-(decimal32_fast lhs) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(decimal32_fast rhs) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(decimal32_fast lhs) noexcept -> decimal32_fast;
 
     // Binary arithmetic
-    friend constexpr auto operator+(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast;
-    friend constexpr auto operator-(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast;
-    friend constexpr auto operator*(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast;
-    friend constexpr auto operator/(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast;
-    friend constexpr auto operator%(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator*(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator/(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator%(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast;
 
     // Mixed type binary arithmetic
     template <typename Integer>
-    friend constexpr auto operator+(decimal32_fast lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(decimal32_fast lhs, Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast);
 
     template <typename Integer>
-    friend constexpr auto operator+(Integer lhs, decimal32_fast rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(Integer lhs, decimal32_fast rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast);
 
     template <typename Integer>
-    friend constexpr auto operator-(decimal32_fast lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(decimal32_fast lhs, Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast);
 
     template <typename Integer>
-    friend constexpr auto operator-(Integer lhs, decimal32_fast rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(Integer lhs, decimal32_fast rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast);
 
     template <typename Integer>
-    friend constexpr auto operator*(decimal32_fast lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator*(decimal32_fast lhs, Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast);
 
     template <typename Integer>
-    friend constexpr auto operator*(Integer lhs, decimal32_fast rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator*(Integer lhs, decimal32_fast rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast);
 
     template <typename Integer>
-    friend constexpr auto operator/(decimal32_fast lhs, Integer rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator/(decimal32_fast lhs, Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast);
 
     template <typename Integer>
-    friend constexpr auto operator/(Integer lhs, decimal32_fast rhs) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator/(Integer lhs, decimal32_fast rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast);
 
     // Compound operators
-    constexpr auto operator+=(decimal32_fast rhs) noexcept -> decimal32_fast&;
-    constexpr auto operator-=(decimal32_fast rhs) noexcept -> decimal32_fast&;
-    constexpr auto operator*=(decimal32_fast rhs) noexcept -> decimal32_fast&;
-    constexpr auto operator/=(decimal32_fast rhs) noexcept -> decimal32_fast&;
-    constexpr auto operator%=(decimal32_fast rhs) noexcept -> decimal32_fast&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+=(decimal32_fast rhs) noexcept -> decimal32_fast&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-=(decimal32_fast rhs) noexcept -> decimal32_fast&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator*=(decimal32_fast rhs) noexcept -> decimal32_fast&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator/=(decimal32_fast rhs) noexcept -> decimal32_fast&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator%=(decimal32_fast rhs) noexcept -> decimal32_fast&;
 
     // Mixed type compound operators
     template <typename Integer>
-    constexpr auto operator+=(Integer rhs) noexcept
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+=(Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast&);
 
     template <typename Integer>
-    constexpr auto operator-=(Integer rhs) noexcept
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-=(Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast&);
 
     template <typename Integer>
-    constexpr auto operator*=(Integer rhs) noexcept
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator*=(Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast&);
 
     template <typename Integer>
-    constexpr auto operator/=(Integer rhs) noexcept
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator/=(Integer rhs) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast&);
 
     // Increment and decrement
-    constexpr auto operator++() noexcept -> decimal32_fast&;
-    constexpr auto operator++(int) noexcept -> decimal32_fast&;
-    constexpr auto operator--() noexcept -> decimal32_fast&;
-    constexpr auto operator--(int) noexcept -> decimal32_fast&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator++() noexcept -> decimal32_fast&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator++(int) noexcept -> decimal32_fast&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator--() noexcept -> decimal32_fast&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator--(int) noexcept -> decimal32_fast&;
 
     // 3.2.2.4 Conversion to integral type
-    explicit constexpr operator bool() const noexcept;
-    explicit constexpr operator int() const noexcept;
-    explicit constexpr operator unsigned() const noexcept;
-    explicit constexpr operator long() const noexcept;
-    explicit constexpr operator unsigned long() const noexcept;
-    explicit constexpr operator long long() const noexcept;
-    explicit constexpr operator unsigned long long() const noexcept;
-    explicit constexpr operator std::int8_t() const noexcept;
-    explicit constexpr operator std::uint8_t() const noexcept;
-    explicit constexpr operator std::int16_t() const noexcept;
-    explicit constexpr operator std::uint16_t() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator bool() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator int() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator unsigned() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator long() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator unsigned long() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator long long() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator unsigned long long() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator std::int8_t() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator std::uint8_t() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator std::int16_t() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator std::uint16_t() const noexcept;
 
     #ifdef BOOST_DECIMAL_HAS_INT128
-    explicit constexpr operator detail::int128_t() const noexcept;
-    explicit constexpr operator detail::uint128_t() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator detail::int128_t() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator detail::uint128_t() const noexcept;
     #endif
 
     // 3.2.6 Conversion to floating-point type
-    explicit BOOST_DECIMAL_CXX20_CONSTEXPR operator float() const noexcept;
-    explicit BOOST_DECIMAL_CXX20_CONSTEXPR operator double() const noexcept;
-    explicit BOOST_DECIMAL_CXX20_CONSTEXPR operator long double() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit BOOST_DECIMAL_CXX20_CONSTEXPR operator float() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit BOOST_DECIMAL_CXX20_CONSTEXPR operator double() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit BOOST_DECIMAL_CXX20_CONSTEXPR operator long double() const noexcept;
 
     #ifdef BOOST_DECIMAL_HAS_FLOAT16
-    explicit constexpr operator std::float16_t() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator std::float16_t() const noexcept;
     #endif
     #ifdef BOOST_DECIMAL_HAS_FLOAT32
-    explicit constexpr operator std::float32_t() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator std::float32_t() const noexcept;
     #endif
     #ifdef BOOST_DECIMAL_HAS_FLOAT64
-    explicit constexpr operator std::float64_t() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator std::float64_t() const noexcept;
     #endif
     #ifdef BOOST_DECIMAL_HAS_BRAINFLOAT16
-    explicit constexpr operator std::bfloat16_t() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator std::bfloat16_t() const noexcept;
     #endif
 
 
     // Conversion to other decimal type
     template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE Decimal, std::enable_if_t<detail::is_decimal_floating_point_v<Decimal>, bool> = true>
-    explicit constexpr operator Decimal() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator Decimal() const noexcept;
 
-    friend constexpr auto direct_init(std::uint_fast32_t significand, std::uint_fast8_t exponent, bool sign) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto direct_init(std::uint_fast32_t significand, std::uint_fast8_t exponent, bool sign) noexcept -> decimal32_fast;
 
     // <cmath> or extensions that need to be friends
     template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE T>
-    friend constexpr auto frexp10(T num, int* expptr) noexcept -> typename T::significand_type;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto frexp10(T num, int* expptr) noexcept -> typename T::significand_type;
 
-    friend constexpr auto copysignd32f(decimal32_fast mag, decimal32_fast sgn) noexcept -> decimal32_fast;
-    friend constexpr auto scalbnd32f(decimal32_fast num, int exp) noexcept -> decimal32_fast;
-    friend constexpr auto scalblnd32f(decimal32_fast num, long exp) noexcept -> decimal32_fast;
-    friend constexpr auto fmad32f(decimal32_fast x, decimal32_fast y, decimal32_fast z) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto copysignd32f(decimal32_fast mag, decimal32_fast sgn) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto scalbnd32f(decimal32_fast num, int exp) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto scalblnd32f(decimal32_fast num, long exp) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto fmad32f(decimal32_fast x, decimal32_fast y, decimal32_fast z) noexcept -> decimal32_fast;
 
     template <typename T>
-    friend constexpr auto ilogb(T d) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto ilogb(T d) noexcept
         BOOST_DECIMAL_REQUIRES_RETURN(detail::is_decimal_floating_point_v, T, int);
 
     template <typename T>
-    friend constexpr auto logb(T num) noexcept
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto logb(T num) noexcept
         BOOST_DECIMAL_REQUIRES(detail::is_decimal_floating_point_v, T);
 
     // Specific decimal functionality
-    friend constexpr auto samequantumd32f(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
-    friend constexpr auto quantexpd32f(decimal32_fast x) noexcept -> int;
-    friend constexpr auto quantized32f(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto samequantumd32f(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto quantexpd32f(decimal32_fast x) noexcept -> int;
+    friend BOOST_DECIMAL_GPU_ENABLED constexpr auto quantized32f(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast;
 };
 
 template <typename T1, typename T2, std::enable_if_t<detail::is_integral_v<T1> && detail::is_integral_v<T2>, bool>>
-constexpr decimal32_fast::decimal32_fast(T1 coeff, T2 exp, bool sign) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr decimal32_fast::decimal32_fast(T1 coeff, T2 exp, bool sign) noexcept
 {
     // Older compilers have issues with conversions from __uint128, so we skip all that and use our uint128
     #if defined(BOOST_DECIMAL_HAS_INT128) && (!defined(__GNUC__) || (defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 10)) && (!defined(__clang__) || (defined(__clang__) && __clang_major__ < 13))
@@ -392,7 +394,7 @@ constexpr decimal32_fast::decimal32_fast(T1 coeff, T2 exp, bool sign) noexcept
 }
 
 template <typename Integer, std::enable_if_t<detail::is_integral_v<Integer>, bool>>
-constexpr decimal32_fast::decimal32_fast(Integer val) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr decimal32_fast::decimal32_fast(Integer val) noexcept
 {
     using ConversionType = std::conditional_t<std::is_same<Integer, bool>::value, std::int32_t, Integer>;
     *this = decimal32_fast{static_cast<ConversionType>(val), 0, false};
@@ -407,7 +409,7 @@ constexpr decimal32_fast::decimal32_fast(Integer val) noexcept
 #endif
 
 template <typename Float, std::enable_if_t<detail::is_floating_point_v<Float>, bool>>
-BOOST_DECIMAL_CXX20_CONSTEXPR decimal32_fast::decimal32_fast(Float val) noexcept
+BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_CXX20_CONSTEXPR decimal32_fast::decimal32_fast(Float val) noexcept
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
     if (val != val)
@@ -432,7 +434,7 @@ BOOST_DECIMAL_CXX20_CONSTEXPR decimal32_fast::decimal32_fast(Float val) noexcept
 #  pragma GCC diagnostic pop
 #endif
 
-constexpr auto direct_init(std::uint_fast32_t significand, std::uint_fast8_t exponent, bool sign = false) noexcept -> decimal32_fast
+BOOST_DECIMAL_GPU_ENABLED constexpr auto direct_init(std::uint_fast32_t significand, std::uint_fast8_t exponent, bool sign = false) noexcept -> decimal32_fast
 {
     decimal32_fast val;
     val.significand_ = significand;
@@ -442,37 +444,37 @@ constexpr auto direct_init(std::uint_fast32_t significand, std::uint_fast8_t exp
     return val;
 }
 
-constexpr auto signbit(decimal32_fast val) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto signbit(decimal32_fast val) noexcept -> bool
 {
     return val.sign_;
 }
 
-constexpr auto isinf(decimal32_fast val) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto isinf(decimal32_fast val) noexcept -> bool
 {
     return val.significand_ == detail::d32_fast_inf;
 }
 
-constexpr auto isnan(decimal32_fast val) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto isnan(decimal32_fast val) noexcept -> bool
 {
     return val.significand_ == detail::d32_fast_qnan || val.significand_ == detail::d32_fast_snan;
 }
 
-constexpr auto issignaling(decimal32_fast val) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto issignaling(decimal32_fast val) noexcept -> bool
 {
     return val.significand_ == detail::d32_fast_snan;
 }
 
-constexpr auto isnormal(decimal32_fast val) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto isnormal(decimal32_fast val) noexcept -> bool
 {
     return (val.significand_ != 0) && isfinite(val) && (val.exponent_ > static_cast<std::uint8_t>(detail::precision_v<decimal32> - 1));
 }
 
-constexpr auto isfinite(decimal32_fast val) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto isfinite(decimal32_fast val) noexcept -> bool
 {
     return val.significand_ < detail::d32_fast_snan;
 }
 
-constexpr auto operator==(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator==(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
 {
     return
            #ifndef BOOST_DECIMAL_FAST_MATH
@@ -483,7 +485,7 @@ constexpr auto operator==(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bo
            (lhs.significand_ == rhs.significand_);
 }
 
-constexpr auto operator!=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator!=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
 {
     return
             #ifndef BOOST_DECIMAL_FAST_MATH
@@ -494,7 +496,7 @@ constexpr auto operator!=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bo
             (lhs.significand_ != rhs.significand_);
 }
 
-constexpr auto operator<(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
     if (!isfinite(lhs) || !isfinite(rhs))
@@ -523,7 +525,7 @@ constexpr auto operator<(decimal32_fast lhs, decimal32_fast rhs) noexcept -> boo
                                      rhs.significand_, rhs.biased_exponent(), rhs.sign_);
 }
 
-constexpr auto operator<=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
     if (!isfinite(lhs) || !isfinite(rhs))
@@ -552,12 +554,12 @@ constexpr auto operator<=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bo
                                       lhs.significand_, lhs.biased_exponent(), lhs.sign_);
 }
 
-constexpr auto operator>(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
 {
     return rhs < lhs;
 }
 
-constexpr auto operator>=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
     if (!isfinite(lhs) || !isfinite(rhs))
@@ -586,42 +588,42 @@ constexpr auto operator>=(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bo
 }
 
 template <typename Integer>
-constexpr auto operator==(decimal32_fast lhs, Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator==(decimal32_fast lhs, Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool)
 {
     return mixed_equality_impl(lhs, rhs);
 }
 
 template <typename Integer>
-constexpr auto operator==(Integer lhs, decimal32_fast rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator==(Integer lhs, decimal32_fast rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool)
 {
     return mixed_equality_impl(rhs, lhs);
 }
 
 template <typename Integer>
-constexpr auto operator!=(decimal32_fast lhs, Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator!=(decimal32_fast lhs, Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool)
 {
     return !(lhs == rhs);
 }
 
 template <typename Integer>
-constexpr auto operator!=(Integer lhs, decimal32_fast rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator!=(Integer lhs, decimal32_fast rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool)
 {
     return !(lhs == rhs);
 }
 
 template <typename Integer>
-constexpr auto operator<(decimal32_fast lhs, Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<(decimal32_fast lhs, Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool)
 {
     return less_impl(lhs, rhs);
 }
 
 template <typename Integer>
-constexpr auto operator<(Integer lhs, decimal32_fast rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<(Integer lhs, decimal32_fast rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool)
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
@@ -632,7 +634,7 @@ constexpr auto operator<(Integer lhs, decimal32_fast rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto operator<=(decimal32_fast lhs, Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=(decimal32_fast lhs, Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool)
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
@@ -643,7 +645,7 @@ constexpr auto operator<=(decimal32_fast lhs, Integer rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto operator<=(Integer lhs, decimal32_fast rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=(Integer lhs, decimal32_fast rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool)
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
@@ -654,7 +656,7 @@ constexpr auto operator<=(Integer lhs, decimal32_fast rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto operator>(decimal32_fast lhs, Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>(decimal32_fast lhs, Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool)
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
@@ -665,7 +667,7 @@ constexpr auto operator>(decimal32_fast lhs, Integer rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto operator>(Integer lhs, decimal32_fast rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>(Integer lhs, decimal32_fast rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool)
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
@@ -676,7 +678,7 @@ constexpr auto operator>(Integer lhs, decimal32_fast rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto operator>=(decimal32_fast lhs, Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>=(decimal32_fast lhs, Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool)
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
@@ -687,7 +689,7 @@ constexpr auto operator>=(decimal32_fast lhs, Integer rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto operator>=(Integer lhs, decimal32_fast rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>=(Integer lhs, decimal32_fast rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, bool)
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
@@ -699,7 +701,7 @@ constexpr auto operator>=(Integer lhs, decimal32_fast rhs) noexcept
 
 #ifdef BOOST_DECIMAL_HAS_SPACESHIP_OPERATOR
 
-constexpr auto operator<=>(decimal32_fast lhs, decimal32_fast rhs) noexcept -> std::partial_ordering
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=>(decimal32_fast lhs, decimal32_fast rhs) noexcept -> std::partial_ordering
 {
     if (lhs < rhs)
     {
@@ -718,7 +720,7 @@ constexpr auto operator<=>(decimal32_fast lhs, decimal32_fast rhs) noexcept -> s
 }
 
 template <typename Integer>
-constexpr auto operator<=>(decimal32_fast lhs, Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=>(decimal32_fast lhs, Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, std::partial_ordering)
 {
     if (lhs < rhs)
@@ -738,7 +740,7 @@ constexpr auto operator<=>(decimal32_fast lhs, Integer rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto operator<=>(Integer lhs, decimal32_fast rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=>(Integer lhs, decimal32_fast rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, std::partial_ordering)
 {
     if (lhs < rhs)
@@ -759,18 +761,18 @@ constexpr auto operator<=>(Integer lhs, decimal32_fast rhs) noexcept
 
 #endif
 
-constexpr auto operator+(decimal32_fast rhs) noexcept -> decimal32_fast
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(decimal32_fast rhs) noexcept -> decimal32_fast
 {
     return rhs;
 }
 
-constexpr auto operator-(decimal32_fast rhs) noexcept -> decimal32_fast
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(decimal32_fast rhs) noexcept -> decimal32_fast
 {
     rhs.sign_ = !rhs.sign_;
     return rhs;
 }
 
-constexpr auto operator+(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
     if (!isfinite(lhs) || !isfinite(rhs))
@@ -786,7 +788,7 @@ constexpr auto operator+(decimal32_fast lhs, decimal32_fast rhs) noexcept -> dec
 }
 
 template <typename Integer>
-constexpr auto operator+(decimal32_fast lhs, Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(decimal32_fast lhs, Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast)
 {
     using promoted_significand_type = detail::promote_significand_t<decimal32_fast, Integer>;
@@ -812,13 +814,13 @@ constexpr auto operator+(decimal32_fast lhs, Integer rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto operator+(Integer lhs, decimal32_fast rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(Integer lhs, decimal32_fast rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast)
 {
     return rhs + lhs;
 }
 
-constexpr auto operator-(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
     if (!isfinite(lhs) || !isfinite(rhs))
@@ -835,7 +837,7 @@ constexpr auto operator-(decimal32_fast lhs, decimal32_fast rhs) noexcept -> dec
 }
 
 template <typename Integer>
-constexpr auto operator-(decimal32_fast lhs, Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(decimal32_fast lhs, Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast)
 {
     using promoted_significand_type = detail::promote_significand_t<decimal32_fast, Integer>;
@@ -863,7 +865,7 @@ constexpr auto operator-(decimal32_fast lhs, Integer rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto operator-(Integer lhs, decimal32_fast rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(Integer lhs, decimal32_fast rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast)
 {
     using promoted_significand_type = detail::promote_significand_t<decimal32_fast, Integer>;
@@ -890,7 +892,7 @@ constexpr auto operator-(Integer lhs, decimal32_fast rhs) noexcept
     );
 }
 
-constexpr auto operator*(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator*(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
     if (!isfinite(lhs) || !isfinite(rhs))
@@ -906,7 +908,7 @@ constexpr auto operator*(decimal32_fast lhs, decimal32_fast rhs) noexcept -> dec
 }
 
 template <typename Integer>
-constexpr auto operator*(decimal32_fast lhs, Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator*(decimal32_fast lhs, Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast)
 {
     using promoted_significand_type = detail::promote_significand_t<decimal32_fast, Integer>;
@@ -932,13 +934,13 @@ constexpr auto operator*(decimal32_fast lhs, Integer rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto operator*(Integer lhs, decimal32_fast rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator*(Integer lhs, decimal32_fast rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast)
 {
     return rhs * lhs;
 }
 
-constexpr auto div_impl(decimal32_fast lhs, decimal32_fast rhs, decimal32_fast& q, decimal32_fast& r) noexcept -> void
+BOOST_DECIMAL_GPU_ENABLED constexpr auto div_impl(decimal32_fast lhs, decimal32_fast rhs, decimal32_fast& q, decimal32_fast& r) noexcept -> void
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
     const bool sign {lhs.isneg() != rhs.isneg()};
@@ -1004,7 +1006,7 @@ constexpr auto div_impl(decimal32_fast lhs, decimal32_fast rhs, decimal32_fast&
     q = decimal32_fast(res_sig, res_exp, lhs.sign_ != rhs.sign_);
 }
 
-constexpr auto mod_impl(decimal32_fast lhs, decimal32_fast rhs, const decimal32_fast& q, decimal32_fast& r) noexcept -> void
+BOOST_DECIMAL_GPU_ENABLED constexpr auto mod_impl(decimal32_fast lhs, decimal32_fast rhs, const decimal32_fast& q, decimal32_fast& r) noexcept -> void
 {
     constexpr decimal32_fast zero {0, 0};
 
@@ -1013,7 +1015,7 @@ constexpr auto mod_impl(decimal32_fast lhs, decimal32_fast rhs, const decimal32_
     r = lhs - (decimal32_fast(q_trunc) * rhs);
 }
 
-constexpr auto operator/(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator/(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast
 {
     decimal32_fast q {};
     decimal32_fast r {};
@@ -1023,7 +1025,7 @@ constexpr auto operator/(decimal32_fast lhs, decimal32_fast rhs) noexcept -> dec
 }
 
 template <typename Integer>
-constexpr auto operator/(decimal32_fast lhs, Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator/(decimal32_fast lhs, Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast)
 {
     using exp_type = decimal32_fast::biased_exponent_type;
@@ -1064,7 +1066,7 @@ constexpr auto operator/(decimal32_fast lhs, Integer rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto operator/(Integer lhs, decimal32_fast rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator/(Integer lhs, decimal32_fast rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast)
 {
     using exp_type = decimal32_fast::biased_exponent_type;
@@ -1103,7 +1105,7 @@ constexpr auto operator/(Integer lhs, decimal32_fast rhs) noexcept
     return detail::generic_div_impl<decimal32_fast>(lhs_components, rhs_components);
 }
 
-constexpr auto operator%(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator%(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast
 {
     decimal32_fast q {};
     decimal32_fast r {};
@@ -1113,38 +1115,38 @@ constexpr auto operator%(decimal32_fast lhs, decimal32_fast rhs) noexcept -> dec
     return r;
 }
 
-constexpr auto decimal32_fast::operator%=(decimal32_fast rhs) noexcept -> decimal32_fast&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator%=(decimal32_fast rhs) noexcept -> decimal32_fast&
 {
     *this = *this % rhs;
     return *this;
 }
 
-constexpr auto decimal32_fast::operator+=(decimal32_fast rhs) noexcept -> decimal32_fast&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator+=(decimal32_fast rhs) noexcept -> decimal32_fast&
 {
     *this = *this + rhs;
     return *this;
 }
 
-constexpr auto decimal32_fast::operator-=(decimal32_fast rhs) noexcept -> decimal32_fast&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator-=(decimal32_fast rhs) noexcept -> decimal32_fast&
 {
     *this = *this - rhs;
     return *this;
 }
 
-constexpr auto decimal32_fast::operator*=(decimal32_fast rhs) noexcept -> decimal32_fast&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator*=(decimal32_fast rhs) noexcept -> decimal32_fast&
 {
     *this = *this * rhs;
     return *this;
 }
 
-constexpr auto decimal32_fast::operator/=(decimal32_fast rhs) noexcept -> decimal32_fast&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator/=(decimal32_fast rhs) noexcept -> decimal32_fast&
 {
     *this = *this / rhs;
     return *this;
 }
 
 template <typename Integer>
-constexpr auto decimal32_fast::operator+=(Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator+=(Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast&)
 {
     *this = *this + rhs;
@@ -1152,7 +1154,7 @@ constexpr auto decimal32_fast::operator+=(Integer rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto decimal32_fast::operator-=(Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator-=(Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast&)
 {
     *this = *this - rhs;
@@ -1160,7 +1162,7 @@ constexpr auto decimal32_fast::operator-=(Integer rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto decimal32_fast::operator*=(Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator*=(Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast&)
 {
     *this = *this * rhs;
@@ -1168,33 +1170,33 @@ constexpr auto decimal32_fast::operator*=(Integer rhs) noexcept
 }
 
 template <typename Integer>
-constexpr auto decimal32_fast::operator/=(Integer rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator/=(Integer rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_integral_v, Integer, decimal32_fast&)
 {
     *this = *this / rhs;
     return *this;
 }
 
-constexpr auto decimal32_fast::operator++() noexcept -> decimal32_fast&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator++() noexcept -> decimal32_fast&
 {
     constexpr decimal32_fast one(1, 0);
     *this = *this + one;
     return *this;
 }
 
-constexpr auto decimal32_fast::operator++(int) noexcept -> decimal32_fast&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator++(int) noexcept -> decimal32_fast&
 {
     return ++(*this);
 }
 
-constexpr auto decimal32_fast::operator--() noexcept -> decimal32_fast&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator--() noexcept -> decimal32_fast&
 {
     constexpr decimal32_fast one(1, 0);
     *this = *this - one;
     return *this;
 }
 
-constexpr auto decimal32_fast::operator--(int) noexcept -> decimal32_fast&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto decimal32_fast::operator--(int) noexcept -> decimal32_fast&
 {
     return --(*this);
 }
@@ -1316,7 +1318,7 @@ constexpr decimal32_fast::operator Decimal() const noexcept
     return to_decimal<Decimal>(*this);
 }
 
-constexpr auto scalblnd32f(decimal32_fast num, long exp) noexcept -> decimal32_fast
+BOOST_DECIMAL_GPU_ENABLED constexpr auto scalblnd32f(decimal32_fast num, long exp) noexcept -> decimal32_fast
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
     constexpr decimal32_fast zero {0, 0};
@@ -1332,12 +1334,12 @@ constexpr auto scalblnd32f(decimal32_fast num, long exp) noexcept -> decimal32_f
     return num;
 }
 
-constexpr auto scalbnd32f(decimal32_fast num, int expval) noexcept -> decimal32_fast
+BOOST_DECIMAL_GPU_ENABLED constexpr auto scalbnd32f(decimal32_fast num, int expval) noexcept -> decimal32_fast
 {
     return scalblnd32f(num, static_cast<long>(expval));
 }
 
-constexpr auto copysignd32f(decimal32_fast mag, decimal32_fast sgn) noexcept -> decimal32_fast
+BOOST_DECIMAL_GPU_ENABLED constexpr auto copysignd32f(decimal32_fast mag, decimal32_fast sgn) noexcept -> decimal32_fast
 {
     mag.sign_ = sgn.sign_;
     return mag;
@@ -1347,7 +1349,7 @@ constexpr auto copysignd32f(decimal32_fast mag, decimal32_fast sgn) noexcept ->
 // If both x and y are NaN, or infinity, they have the same quantum exponents;
 // if exactly one operand is infinity or exactly one operand is NaN, they do not have the same quantum exponents.
 // The samequantum functions raise no exception.
-constexpr auto samequantumd32f(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto samequantumd32f(decimal32_fast lhs, decimal32_fast rhs) noexcept -> bool
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
     const auto lhs_fp {fpclassify(lhs)};
@@ -1368,7 +1370,7 @@ constexpr auto samequantumd32f(decimal32_fast lhs, decimal32_fast rhs) noexcept
 
 // Effects: if x is finite, returns its quantum exponent.
 // Otherwise, a domain error occurs and INT_MIN is returned.
-constexpr auto quantexpd32f(decimal32_fast x) noexcept -> int
+BOOST_DECIMAL_GPU_ENABLED constexpr auto quantexpd32f(decimal32_fast x) noexcept -> int
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
     if (!isfinite(x))
@@ -1390,7 +1392,7 @@ constexpr auto quantexpd32f(decimal32_fast x) noexcept -> int
 // Otherwise, if only one operand is infinity, the "invalid" floating-point exception is raised and the result is NaN.
 // If both operands are infinity, the result is DEC_INFINITY, with the same sign as x, converted to the type of x.
 // The quantize functions do not signal underflow.
-constexpr auto quantized32f(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast
+BOOST_DECIMAL_GPU_ENABLED constexpr auto quantized32f(decimal32_fast lhs, decimal32_fast rhs) noexcept -> decimal32_fast
 {
     #ifndef BOOST_DECIMAL_FAST_MATH
     // Return the correct type of nan
@@ -1464,17 +1466,17 @@ struct numeric_limits<boost::decimal::decimal32_fast>
     BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr bool tinyness_before = true;
 
     // Member functions
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto (min)        () -> boost::decimal::decimal32_fast { return {1, min_exponent}; }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto (max)        () -> boost::decimal::decimal32_fast { return {9'999'999, max_exponent}; }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto lowest       () -> boost::decimal::decimal32_fast { return {-9'999'999, max_exponent}; }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto epsilon      () -> boost::decimal::decimal32_fast { return {1, -7}; }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto round_error  () -> boost::decimal::decimal32_fast { return epsilon(); }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto infinity     () -> boost::decimal::decimal32_fast { return boost::decimal::direct_init(boost::decimal::detail::d32_fast_inf, UINT8_C((0))); }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto quiet_NaN    () -> boost::decimal::decimal32_fast { return boost::decimal::direct_init(boost::decimal::detail::d32_fast_qnan, UINT8_C((0))); }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto signaling_NaN() -> boost::decimal::decimal32_fast { return boost::decimal::direct_init(boost::decimal::detail::d32_fast_snan, UINT8_C((0))); }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto (min)        () -> boost::decimal::decimal32_fast { return {1, min_exponent}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto (max)        () -> boost::decimal::decimal32_fast { return {9'999'999, max_exponent}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto lowest       () -> boost::decimal::decimal32_fast { return {-9'999'999, max_exponent}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto epsilon      () -> boost::decimal::decimal32_fast { return {1, -7}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto round_error  () -> boost::decimal::decimal32_fast { return epsilon(); }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto infinity     () -> boost::decimal::decimal32_fast { return boost::decimal::direct_init(boost::decimal::detail::d32_fast_inf, UINT8_C((0))); }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto quiet_NaN    () -> boost::decimal::decimal32_fast { return boost::decimal::direct_init(boost::decimal::detail::d32_fast_qnan, UINT8_C((0))); }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto signaling_NaN() -> boost::decimal::decimal32_fast { return boost::decimal::direct_init(boost::decimal::detail::d32_fast_snan, UINT8_C((0))); }
 
     // With denorm absent returns the same value as min
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto denorm_min   () -> boost::decimal::decimal32_fast { return {1, min_exponent}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto denorm_min   () -> boost::decimal::decimal32_fast { return {1, min_exponent}; }
 };
 
 } // Namespace std
diff --git a/include/boost/decimal/detail/add_impl.hpp b/include/boost/decimal/detail/add_impl.hpp
index 771c57be6..c1501dc0d 100644
--- a/include/boost/decimal/detail/add_impl.hpp
+++ b/include/boost/decimal/detail/add_impl.hpp
@@ -5,9 +5,12 @@
 #ifndef BOOST_DECIMAL_DETAIL_ADD_IMPL_HPP
 #define BOOST_DECIMAL_DETAIL_ADD_IMPL_HPP
 
+#include <boost/decimal/detail/config.hpp>
 #include <boost/decimal/detail/attributes.hpp>
 #include <boost/decimal/detail/apply_sign.hpp>
 #include <boost/decimal/detail/fenv_rounding.hpp>
+#include <boost/decimal/detail/type_traits.hpp>
+#include <boost/decimal/detail/numeric_limits.hpp>
 
 #ifndef BOOST_DECIMAL_BUILD_MODULE
 #include <cstdint>
@@ -18,9 +21,10 @@ namespace decimal {
 namespace detail {
 
 template <typename ReturnType, typename T, typename U>
-BOOST_DECIMAL_FORCE_INLINE constexpr auto d32_add_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                                                       T rhs_sig, U rhs_exp, bool rhs_sign,
-                                                       bool abs_lhs_bigger) noexcept -> ReturnType
+BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_FORCE_INLINE 
+constexpr auto d32_add_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
+                            T rhs_sig, U rhs_exp, bool rhs_sign,
+                            bool abs_lhs_bigger) noexcept -> ReturnType
 {
     using add_type = std::int_fast32_t;
 
@@ -64,7 +68,7 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto d32_add_impl(T lhs_sig, U lhs_exp, boo
 
     if (delta_exp <= 2)
     {
-        sig_bigger *= pow10(static_cast<std::remove_reference_t<decltype(sig_bigger)>>(delta_exp));
+        sig_bigger *= pow10(static_cast<BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::remove_reference_t<decltype(sig_bigger)>>(delta_exp));
         exp_bigger -= delta_exp;
         delta_exp = 0;
     }
@@ -76,7 +80,7 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto d32_add_impl(T lhs_sig, U lhs_exp, boo
 
         if (delta_exp > 1)
         {
-            sig_smaller /= pow10(static_cast<std::remove_reference_t<decltype(sig_smaller)>>(delta_exp - 1));
+            sig_smaller /= pow10(static_cast<BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::remove_reference_t<decltype(sig_smaller)>>(delta_exp - 1));
             delta_exp = 1;
         }
     }
@@ -103,8 +107,9 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto d32_add_impl(T lhs_sig, U lhs_exp, boo
 }
 
 template <typename ReturnType, typename T, typename U>
-BOOST_DECIMAL_FORCE_INLINE constexpr auto add_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                                                   T rhs_sig, U rhs_exp, bool rhs_sign) noexcept -> ReturnType
+BOOST_DECIMAL_FORCE_INLINE BOOST_DECIMAL_GPU_ENABLED
+constexpr auto add_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
+                        T rhs_sig, U rhs_exp, bool rhs_sign) noexcept -> ReturnType
 {
     const bool sign {lhs_sign};
 
@@ -176,9 +181,9 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto add_impl(T lhs_sig, U lhs_exp, bool lh
 }
 
 template <typename ReturnType, typename T, typename U>
-constexpr auto d64_add_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                            T rhs_sig, U rhs_exp, bool rhs_sign,
-                            bool abs_lhs_bigger) noexcept -> ReturnType
+BOOST_DECIMAL_GPU_ENABLED constexpr auto d64_add_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
+                                                      T rhs_sig, U rhs_exp, bool rhs_sign,
+                                                      bool abs_lhs_bigger) noexcept -> ReturnType
 {
     using add_type = std::int_fast64_t;
 
@@ -261,8 +266,8 @@ constexpr auto d64_add_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
 
 template <typename ReturnType, BOOST_DECIMAL_INTEGRAL T1, BOOST_DECIMAL_INTEGRAL U1,
                                BOOST_DECIMAL_INTEGRAL T2, BOOST_DECIMAL_INTEGRAL U2>
-constexpr auto d128_add_impl(T1 lhs_sig, U1 lhs_exp, bool lhs_sign,
-                             T2 rhs_sig, U2 rhs_exp, bool rhs_sign) noexcept -> ReturnType
+BOOST_DECIMAL_GPU_ENABLED constexpr auto d128_add_impl(T1 lhs_sig, U1 lhs_exp, bool lhs_sign,
+                                                       T2 rhs_sig, U2 rhs_exp, bool rhs_sign) noexcept -> ReturnType
 {
     const bool sign {lhs_sign};
 
@@ -284,7 +289,7 @@ constexpr auto d128_add_impl(T1 lhs_sig, U1 lhs_exp, bool lhs_sign,
         //
         // e.g. 1.234567e5 + 9.876543e-2 = 1.234568e5
 
-        BOOST_DECIMAL_IF_CONSTEXPR (std::numeric_limits<T2>::digits10 > std::numeric_limits<std::uint64_t>::digits10)
+        BOOST_DECIMAL_IF_CONSTEXPR (boost::decimal::detail::numeric_limits<T2>::digits10 > boost::decimal::detail::numeric_limits<std::uint64_t>::digits10)
         {
             if (rhs_sig >= detail::uint128 {UINT64_C(0xF684DF56C3E0), UINT64_C(0x1BC6C73200000000)})
             {
@@ -341,9 +346,9 @@ constexpr auto d128_add_impl(T1 lhs_sig, U1 lhs_exp, bool lhs_sign,
 }
 
 template <typename ReturnType, typename T, typename U>
-constexpr auto d128_add_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                             T rhs_sig, U rhs_exp, bool rhs_sign,
-                             bool abs_lhs_bigger) noexcept -> ReturnType
+BOOST_DECIMAL_GPU_ENABLED constexpr auto d128_add_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
+                                                       T rhs_sig, U rhs_exp, bool rhs_sign,
+                                                       bool abs_lhs_bigger) noexcept -> ReturnType
 {
     auto delta_exp {lhs_exp > rhs_exp ? lhs_exp - rhs_exp : rhs_exp - lhs_exp};
 
diff --git a/include/boost/decimal/detail/apply_sign.hpp b/include/boost/decimal/detail/apply_sign.hpp
index 6fcf8b077..7ae0124dc 100644
--- a/include/boost/decimal/detail/apply_sign.hpp
+++ b/include/boost/decimal/detail/apply_sign.hpp
@@ -22,39 +22,39 @@ namespace boost { namespace decimal { namespace detail {
 
 template <typename Integer, typename Unsigned_Integer = detail::make_unsigned_t<Integer>,
           std::enable_if_t<detail::is_signed_v<Integer>, bool> = true>
-constexpr auto apply_sign(Integer val) noexcept -> Unsigned_Integer
+BOOST_DECIMAL_GPU_ENABLED constexpr auto apply_sign(Integer val) noexcept -> Unsigned_Integer
 {
     return static_cast<Unsigned_Integer>(-(static_cast<Unsigned_Integer>(val)));
 }
 
 template <typename Unsigned_Integer, std::enable_if_t<!detail::is_signed_v<Unsigned_Integer>, bool> = true>
-constexpr auto apply_sign(Unsigned_Integer val) noexcept -> Unsigned_Integer
+BOOST_DECIMAL_GPU_ENABLED constexpr auto apply_sign(Unsigned_Integer val) noexcept -> Unsigned_Integer
 {
     return val;
 }
 
 template <typename Integer, typename Unsigned_Integer = detail::make_unsigned_t<Integer>,
           std::enable_if_t<detail::is_signed_v<Integer>, bool> = true>
-constexpr auto make_positive_unsigned(Integer val) noexcept -> Unsigned_Integer
+BOOST_DECIMAL_GPU_ENABLED constexpr auto make_positive_unsigned(Integer val) noexcept -> Unsigned_Integer
 {
     return static_cast<Unsigned_Integer>(val < static_cast<Integer>(static_cast<std::int8_t>(0)) ? apply_sign(val) : static_cast<Unsigned_Integer>(val));
 }
 
 template <typename Unsigned_Integer, std::enable_if_t<!detail::is_signed_v<Unsigned_Integer>, bool> = true>
-constexpr auto make_positive_unsigned(Unsigned_Integer val) noexcept -> Unsigned_Integer
+BOOST_DECIMAL_GPU_ENABLED constexpr auto make_positive_unsigned(Unsigned_Integer val) noexcept -> Unsigned_Integer
 {
     return val;
 }
 
 template <typename Integer, std::enable_if_t<detail::is_signed_v<Integer>, bool> = true>
-constexpr auto make_signed_value(Integer val, bool sign) noexcept -> Integer
+BOOST_DECIMAL_GPU_ENABLED constexpr auto make_signed_value(Integer val, bool sign) noexcept -> Integer
 {
     return sign ? -val : val;
 }
 
 template <typename Unsigned_Integer, typename Integer = detail::make_signed_t<Unsigned_Integer>,
           std::enable_if_t<!detail::is_signed_v<Unsigned_Integer>, bool> = true>
-constexpr auto make_signed_value(Unsigned_Integer val, bool sign) noexcept -> Integer
+BOOST_DECIMAL_GPU_ENABLED constexpr auto make_signed_value(Unsigned_Integer val, bool sign) noexcept -> Integer
 {
     const auto signed_val {static_cast<Integer>(val)};
     return sign ? -signed_val : signed_val;
diff --git a/include/boost/decimal/detail/cmath/abs.hpp b/include/boost/decimal/detail/cmath/abs.hpp
index a1b5523a9..44da32421 100644
--- a/include/boost/decimal/detail/cmath/abs.hpp
+++ b/include/boost/decimal/detail/cmath/abs.hpp
@@ -19,7 +19,7 @@ namespace boost {
 namespace decimal {
 
 BOOST_DECIMAL_EXPORT template <typename T>
-constexpr auto abs BOOST_DECIMAL_PREVENT_MACRO_SUBSTITUTION (T rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto abs BOOST_DECIMAL_PREVENT_MACRO_SUBSTITUTION (T rhs) noexcept
     BOOST_DECIMAL_REQUIRES(detail::is_decimal_floating_point_v, T)
 {
     return signbit(rhs) ? -rhs : rhs;
diff --git a/include/boost/decimal/detail/cmath/fpclassify.hpp b/include/boost/decimal/detail/cmath/fpclassify.hpp
index d75f09702..d026a0961 100644
--- a/include/boost/decimal/detail/cmath/fpclassify.hpp
+++ b/include/boost/decimal/detail/cmath/fpclassify.hpp
@@ -20,7 +20,7 @@ namespace boost {
 namespace decimal {
 
 BOOST_DECIMAL_EXPORT template <typename T>
-constexpr auto fpclassify BOOST_DECIMAL_PREVENT_MACRO_SUBSTITUTION (T rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr auto fpclassify BOOST_DECIMAL_PREVENT_MACRO_SUBSTITUTION (T rhs) noexcept
     BOOST_DECIMAL_REQUIRES_RETURN(detail::is_decimal_floating_point_v, T, int)
 {
     constexpr T zero {0, 0};
diff --git a/include/boost/decimal/detail/concepts.hpp b/include/boost/decimal/detail/concepts.hpp
index 5961969d4..7c7c7cb7c 100644
--- a/include/boost/decimal/detail/concepts.hpp
+++ b/include/boost/decimal/detail/concepts.hpp
@@ -5,6 +5,7 @@
 #ifndef BOOST_DECIMAL_DETAIL_CONCEPTS
 #define BOOST_DECIMAL_DETAIL_CONCEPTS
 
+#include <boost/decimal/detail/config.hpp>
 #include <boost/decimal/detail/promotion.hpp>
 #include <boost/decimal/detail/type_traits.hpp>
 
@@ -413,23 +414,23 @@ concept execution_policy = std::is_execution_policy_v<std::remove_cvref_t<T>>;
 #endif
 
 #ifndef BOOST_DECIMAL_REQUIRES
-#  define BOOST_DECIMAL_REQUIRES(X, T) -> std::enable_if_t<X<T>, T>
+#  define BOOST_DECIMAL_REQUIRES(X, T) -> BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<X<T>, T>
 #endif
 
 #ifndef BOOST_DECIMAL_REQUIRES_TWO
-#  define BOOST_DECIMAL_REQUIRES_TWO(X1, T1, X2, T2) -> std::enable_if_t<X1<T1> && X2<T2>, detail::promote_args_t<T1, T2>>
+#  define BOOST_DECIMAL_REQUIRES_TWO(X1, T1, X2, T2) -> BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<X1<T1> && X2<T2>, detail::promote_args_t<T1, T2>>
 #endif
 
 #ifndef BOOST_DECIMAL_REQUIRES_TWO_RETURN
-#  define BOOST_DECIMAL_REQUIRES_TWO_RETURN(X1, T1, X2, T2, ReturnType) -> std::enable_if_t<X1<T1> && X2<T2>, ReturnType>
+#  define BOOST_DECIMAL_REQUIRES_TWO_RETURN(X1, T1, X2, T2, ReturnType) -> BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<X1<T1> && X2<T2>, ReturnType>
 #endif
 
 #ifndef BOOST_DECIMAL_REQUIRES_THREE
-#  define BOOST_DECIMAL_REQUIRES_THREE(X1, T1, X2, T2, X3, T3) -> std::enable_if_t<X1<T1> && X2<T2> && X3<T3>, detail::promote_args_t<T1, T2, T3>>
+#  define BOOST_DECIMAL_REQUIRES_THREE(X1, T1, X2, T2, X3, T3) -> BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<X1<T1> && X2<T2> && X3<T3>, detail::promote_args_t<T1, T2, T3>>
 #endif
 
 #ifndef BOOST_DECIMAL_REQUIRES_RETURN
-#  define BOOST_DECIMAL_REQUIRES_RETURN(X, T, ReturnType) -> std::enable_if_t<X<T>, ReturnType>
+#  define BOOST_DECIMAL_REQUIRES_RETURN(X, T, ReturnType) -> BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<X<T>, ReturnType>
 #endif
 
 #endif //BOOST_DECIMAL_DETAIL_CONCEPTS
diff --git a/include/boost/decimal/detail/config.hpp b/include/boost/decimal/detail/config.hpp
index 7f6d26841..9224a77df 100644
--- a/include/boost/decimal/detail/config.hpp
+++ b/include/boost/decimal/detail/config.hpp
@@ -45,36 +45,6 @@
 #  define BOOST_DECIMAL_CXX20_CONSTEXPR inline
 #endif
 
-// Include intrinsics if available
-// This section allows us to disable any of the following independently.
-//   Use #define BOOST_DECIMAL_DISABLE_CASSERT to disable uses of assert.
-//   Use #define BOOST_DECIMAL_DISABLE_IOSTREAM to disable uses of I/O streaming.
-//   Use #define BOOST_DECIMAL_DISABLE_CLIB to disable uses of both assert as well as I/O streaming (and all oother heavyweight C-LIB artifacts).
-
-#if (!defined(BOOST_DECIMAL_DISABLE_CASSERT) && !defined(BOOST_DECIMAL_DISABLE_CLIB))
-#  ifndef BOOST_DECIMAL_BUILD_MODULE
-#    include <cassert>
-#  endif
-#endif
-
-#ifndef BOOST_DECIMAL_DISABLE_CASSERT
-#  define BOOST_DECIMAL_ASSERT(x) assert(x)
-#  define BOOST_DECIMAL_ASSERT_MSG(expr, msg) assert((expr)&&(msg))
-#else
-#  define BOOST_DECIMAL_ASSERT(x)
-#  define BOOST_DECIMAL_ASSERT_MSG(expr, msg)
-#endif
-
-#ifdef BOOST_DECIMAL_DISABLE_CLIB
-#  ifndef BOOST_DECIMAL_DISABLE_IOSTREAM
-#    define BOOST_DECIMAL_DISABLE_IOSTREAM
-#  endif
-#  ifndef BOOST_DECIMAL_DISABLE_CASSERT
-#    undef BOOST_DECIMAL_ASSERT
-#    define BOOST_DECIMAL_ASSERT(x)
-#  endif
-#endif
-
 // Include intrinsics if available
 #if defined(_MSC_VER)
 #  ifndef BOOST_DECIMAL_BUILD_MODULE
@@ -306,4 +276,154 @@ typedef unsigned __int128 uint128_t;
 #  define BOOST_DECIMAL_FAST_MATH
 #endif
 
+// GPU Options
+
+//
+// CUDA support:
+//
+
+#ifdef __CUDACC__
+#  define BOOST_DECIMAL_CUDA_ENABLED __host__ __device__
+#  define BOOST_DECIMAL_HAS_GPU_SUPPORT
+
+#  ifndef BOOST_DECIMAL_ENABLE_CUDA
+#    define BOOST_DECIMAL_ENABLE_CUDA
+#  endif
+
+// Device code can not handle exceptions
+#  ifndef BOOST_DECIMAL_NO_EXCEPTIONS
+#    define BOOST_DECIMAL_NO_EXCEPTIONS
+#  endif
+
+// We want to use force inline from CUDA instead of the host compiler
+#  undef BOOST_DECIMAL_FORCEINLINE
+#  define BOOST_DECIMAL_FORCEINLINE __forceinline__
+
+#  define BOOST_DECIMAL_NO_LONG_DOUBLE_MATH_FUNCTIONS
+#  define BOOST_DECIMAL_DISABLE_CASSERT
+
+#elif defined(SYCL_LANGUAGE_VERSION)
+
+#  define BOOST_DECIMAL_SYCL_ENABLED SYCL_EXTERNAL
+#  define BOOST_DECIMAL_HAS_GPU_SUPPORT
+
+#  ifndef BOOST_DECIMAL_ENABLE_SYCL
+#    define BOOST_DECIMAL_ENABLE_SYCL
+#  endif
+
+#  ifndef BOOST_DECIMAL_NO_EXCEPTIONS
+#    define BOOST_DECIMAL_NO_EXCEPTIONS
+#  endif
+
+// spir64 does not support long double
+#  define BOOST_DECIMAL_NO_LONG_DOUBLE_MATH_FUNCTIONS
+
+#  undef BOOST_DECIMAL_FORCEINLINE
+#  define BOOST_DECIMAL_FORCEINLINE inline
+
+// __int128 don't compile
+#  undef BOOST_DECIMAL_HAS_INT128
+
+#endif
+
+#ifndef BOOST_DECIMAL_CUDA_ENABLED
+#  define BOOST_DECIMAL_CUDA_ENABLED
+#endif
+
+#ifndef BOOST_DECIMAL_SYCL_ENABLED
+#  define BOOST_DECIMAL_SYCL_ENABLED
+#endif
+
+// Not all functions that allow CUDA allow SYCL (e.g. Recursion is disallowed by SYCL)
+#  define BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_CUDA_ENABLED BOOST_DECIMAL_SYCL_ENABLED
+
+// Additional functions that need replaced/marked up
+#ifdef BOOST_DECIMAL_HAS_GPU_SUPPORT
+namespace boost {
+namespace decimal {
+namespace detail {
+
+template <class T>
+BOOST_DECIMAL_GPU_ENABLED constexpr void gpu_safe_swap(T& a, T& b) { T t(a); a = b; b = t; }
+template <class T>
+BOOST_DECIMAL_GPU_ENABLED constexpr T gpu_safe_min(const T& a, const T& b) { return a < b ? a : b; }
+template <class T>
+BOOST_DECIMAL_GPU_ENABLED constexpr T cuda_safe_max(const T& a, const T& b) { return a > b ? a : b; }
+
+} // namespace detail
+} // namespace decimal
+} // namespace boost
+
+#define BOOST_DECIMAL_GPU_SAFE_SWAP(a, b) boost::decimal::detail::gpu_safe_swap(a, b)
+#define BOOST_DECIMAL_GPU_SAFE_MIN(a, b) boost::decimal::detail::gpu_safe_min(a, b)
+#define BOOST_DECIMAL_GPU_SAFE_MAX(a, b) boost::decimal::detail::gpu_safe_max(a, b)
+
+#else
+
+#define BOOST_DECIMAL_GPU_SAFE_SWAP(a, b) std::swap(a, b)
+#define BOOST_DECIMAL_GPU_SAFE_MIN(a, b) (std::min)(a, b)
+#define BOOST_DECIMAL_GPU_SAFE_MAX(a, b) (std::max)(a, b)
+
+#endif
+
+// Static variables are not allowed with CUDA or C++20 modules
+// See if we can inline them instead
+
+#if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L
+#  define BOOST_DECIMAL_STATIC_CONSTEXPR inline constexpr
+#  define BOOST_DECIMAL_STATIC static
+#  ifndef BOOST_DECIMAL_HAS_GPU_SUPPORT
+#    define BOOST_DECIMAL_STATIC_LOCAL_VARIABLE static
+#  else
+#    define BOOST_DECIMAL_STATIC_LOCAL_VARIABLE
+#  endif
+#else
+#  ifndef BOOST_DECIMAL_HAS_GPU_SUPPORT
+#    define BOOST_DECIMAL_STATIC_CONSTEXPR static constexpr
+#    define BOOST_DECIMAL_STATIC static
+#    define BOOST_DECIMAL_STATIC_LOCAL_VARIABLE
+#  else
+#    define BOOST_DECIMAL_STATIC_CONSTEXPR constexpr
+#    define BOOST_DECIMAL_STATIC constexpr
+#    define BOOST_DECIMAL_STATIC_LOCAL_VARIABLE static
+#  endif
+#endif
+
+#ifdef BOOST_DECIMAL_ENABLE_CUDA
+#  include <cuda/std/type_traits>
+#  define BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE cuda::std
+#else
+#  include <type_traits>
+#  define BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE std
+#endif 
+
+// This section allows us to disable any of the following independently.
+//   Use #define BOOST_DECIMAL_DISABLE_CASSERT to disable uses of assert.
+//   Use #define BOOST_DECIMAL_DISABLE_IOSTREAM to disable uses of I/O streaming.
+//   Use #define BOOST_DECIMAL_DISABLE_CLIB to disable uses of both assert as well as I/O streaming (and all oother heavyweight C-LIB artifacts).
+
+#if (!defined(BOOST_DECIMAL_DISABLE_CASSERT) && !defined(BOOST_DECIMAL_DISABLE_CLIB))
+#  ifndef BOOST_DECIMAL_BUILD_MODULE
+#    include <cassert>
+#  endif
+#endif
+
+#ifndef BOOST_DECIMAL_DISABLE_CASSERT
+#  define BOOST_DECIMAL_ASSERT(x) assert(x)
+#  define BOOST_DECIMAL_ASSERT_MSG(expr, msg) assert((expr)&&(msg))
+#else
+#  define BOOST_DECIMAL_ASSERT(x)
+#  define BOOST_DECIMAL_ASSERT_MSG(expr, msg)
+#endif
+
+#ifdef BOOST_DECIMAL_DISABLE_CLIB
+#  ifndef BOOST_DECIMAL_DISABLE_IOSTREAM
+#    define BOOST_DECIMAL_DISABLE_IOSTREAM
+#  endif
+#  ifndef BOOST_DECIMAL_DISABLE_CASSERT
+#    undef BOOST_DECIMAL_ASSERT
+#    define BOOST_DECIMAL_ASSERT(x)
+#  endif
+#endif
+
 #endif // BOOST_DECIMAL_DETAIL_CONFIG_HPP
diff --git a/include/boost/decimal/detail/countl.hpp b/include/boost/decimal/detail/countl.hpp
index 9b4f0a0e2..9535f2d19 100644
--- a/include/boost/decimal/detail/countl.hpp
+++ b/include/boost/decimal/detail/countl.hpp
@@ -7,8 +7,9 @@
 #define BOOST_DECIMAL_DETAIL_COUNTL_HPP
 
 #include <boost/decimal/detail/config.hpp>
+#include <boost/decimal/detail/numeric_limits.hpp>
 
-#ifndef BOOST_DECIMAL_BUILD_MODULE
+#if !defined(BOOST_DECIMAL_BUILD_MODULE) && !defined(BOOST_DECIMAL_HAS_GPU_SUPPORT)
 #include <cstdint>
 #include <limits>
 #endif
@@ -16,9 +17,10 @@
 namespace boost {
 namespace decimal {
 namespace detail {
+
 namespace impl {
 
-#if BOOST_DECIMAL_HAS_BUILTIN(__builtin_clz)
+#if BOOST_DECIMAL_HAS_BUILTIN(__builtin_clz) && !defined(BOOST_DECIMAL_HAS_GPU_SUPPORT)
 
 constexpr int countl_impl(unsigned char x) noexcept
 {
@@ -51,20 +53,20 @@ constexpr int countl_impl(unsigned long long x) noexcept
 
 #else
 
-BOOST_DECIMAL_CONSTEXPR_VARIABLE int index64[64] = {
-    0, 47,  1, 56, 48, 27,  2, 60,
-    57, 49, 41, 37, 28, 16,  3, 61,
-    54, 58, 35, 52, 50, 42, 21, 44,
-    38, 32, 29, 23, 17, 11,  4, 62,
-    46, 55, 26, 59, 40, 36, 15, 53,
-    34, 51, 20, 43, 31, 22, 10, 45,
-    25, 39, 14, 33, 19, 30,  9, 24,
-    13, 18,  8, 12,  7,  6,  5, 63
-};
-
 // See: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
-constexpr auto bit_scan_reverse(std::uint64_t bb) noexcept -> int
+BOOST_DECIMAL_GPU_ENABLED constexpr auto bit_scan_reverse(std::uint64_t bb) noexcept -> int
 {
+    constexpr int index64[64] = {
+        0, 47,  1, 56, 48, 27,  2, 60,
+        57, 49, 41, 37, 28, 16,  3, 61,
+        54, 58, 35, 52, 50, 42, 21, 44,
+        38, 32, 29, 23, 17, 11,  4, 62,
+        46, 55, 26, 59, 40, 36, 15, 53,
+        34, 51, 20, 43, 31, 22, 10, 45,
+        25, 39, 14, 33, 19, 30,  9, 24,
+        13, 18,  8, 12,  7,  6,  5, 63
+    };
+
     constexpr auto debruijn64 {UINT64_C(0x03f79d71b4cb0a89)};
 
     BOOST_DECIMAL_ASSERT(bb != 0);
@@ -80,9 +82,9 @@ constexpr auto bit_scan_reverse(std::uint64_t bb) noexcept -> int
 }
 
 template <typename T>
-constexpr int countl_impl(T x) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr int countl_impl(T x) noexcept
 {
-    return x ? bit_scan_reverse(static_cast<std::uint64_t>(x)) ^ 63 : std::numeric_limits<T>::digits;
+    return x ? bit_scan_reverse(static_cast<std::uint64_t>(x)) ^ 63 : boost::decimal::detail::numeric_limits<T>::digits;
 }
 
 #endif
@@ -90,9 +92,9 @@ constexpr int countl_impl(T x) noexcept
 } //namespace impl
 
 template <typename T>
-constexpr int countl_zero(T x) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr int countl_zero(T x) noexcept
 {
-    static_assert(std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed,
+    static_assert(boost::decimal::detail::numeric_limits<T>::is_integer && !boost::decimal::detail::numeric_limits<T>::is_signed,
                   "Can only count with unsigned integers");
 
     return impl::countl_impl(x);
diff --git a/include/boost/decimal/detail/div_impl.hpp b/include/boost/decimal/detail/div_impl.hpp
index ef0a0f22b..8535deaa9 100644
--- a/include/boost/decimal/detail/div_impl.hpp
+++ b/include/boost/decimal/detail/div_impl.hpp
@@ -6,6 +6,7 @@
 #define BOOST_DECIMAL_DETAIL_DIV_IMPL_HPP
 
 #include <boost/decimal/detail/config.hpp>
+#include <boost/decimal/detail/numeric_limits.hpp>
 
 #ifndef BOOST_DECIMAL_BUILD_MODULE
 #include <limits>
@@ -17,7 +18,7 @@ namespace decimal {
 namespace detail {
 
 template <typename DecimalType, typename T>
-BOOST_DECIMAL_FORCE_INLINE constexpr auto generic_div_impl(const T& lhs, const T& rhs) noexcept -> DecimalType
+BOOST_DECIMAL_FORCE_INLINE BOOST_DECIMAL_GPU_ENABLED constexpr auto generic_div_impl(const T& lhs, const T& rhs) noexcept -> DecimalType
 {
     bool sign {lhs.sign != rhs.sign};
 
@@ -43,7 +44,7 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto generic_div_impl(const T& lhs, const T
 }
 
 template <typename DecimalType, typename T>
-constexpr auto d64_generic_div_impl(const T& lhs, const T& rhs) noexcept -> DecimalType
+BOOST_DECIMAL_GPU_ENABLED constexpr auto d64_generic_div_impl(const T& lhs, const T& rhs) noexcept -> DecimalType
 {
     #if defined(BOOST_DECIMAL_HAS_INT128) && (!defined(__clang_major__) || __clang_major__ > 13)
     using unsigned_int128_type = boost::decimal::detail::uint128_t;
@@ -71,7 +72,7 @@ constexpr auto d64_generic_div_impl(const T& lhs, const T& rhs) noexcept -> Deci
 }
 
 template <typename T>
-constexpr auto d128_generic_div_impl(T lhs, T rhs, T& q) noexcept -> void
+BOOST_DECIMAL_GPU_ENABLED constexpr auto d128_generic_div_impl(T lhs, T rhs, T& q) noexcept -> void
 {
     bool sign {lhs.sign != rhs.sign};
 
@@ -83,9 +84,9 @@ constexpr auto d128_generic_div_impl(T lhs, T rhs, T& q) noexcept -> void
 
     const auto sig_dig {detail::num_digits(res_sig)};
 
-    if (sig_dig > std::numeric_limits<detail::uint128>::digits10)
+    if (sig_dig > detail::numeric_limits<detail::uint128>::digits10)
     {
-        const auto digit_delta {sig_dig - std::numeric_limits<detail::uint128>::digits10};
+        const auto digit_delta {sig_dig - detail::numeric_limits<detail::uint128>::digits10};
         res_sig /= detail::uint256_t(pow10(detail::uint128(digit_delta)));
         res_exp += digit_delta;
     }
diff --git a/include/boost/decimal/detail/emulated128.hpp b/include/boost/decimal/detail/emulated128.hpp
index e99e25ec8..95d3efe65 100644
--- a/include/boost/decimal/detail/emulated128.hpp
+++ b/include/boost/decimal/detail/emulated128.hpp
@@ -40,6 +40,10 @@ namespace detail {
 #  pragma GCC diagnostic ignored "-Wreorder"
 #endif
 
+#ifdef BOOST_DECIMAL_ENABLE_CUDA
+#  pragma nv_diag_suppress 20012
+#endif
+
 // Compilers might support built-in 128-bit integer types. However, it seems that
 // emulating them with a pair of 64-bit integers actually produces a better code,
 // so we avoid using those built-ins. That said, they are still useful for
@@ -57,27 +61,27 @@ struct uint128
     #endif
 
     // Constructors
-    constexpr uint128() noexcept = default;
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint128() noexcept = default;
 
-    constexpr uint128(const uint128& v) noexcept = default;
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint128(const uint128& v) noexcept = default;
 
-    constexpr uint128(uint128&& v) noexcept = default;
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint128(uint128&& v) noexcept = default;
 
     #if BOOST_DECIMAL_ENDIAN_LITTLE_BYTE
-    constexpr uint128(std::uint64_t high_, std::uint64_t low_) noexcept : low {low_}, high {high_} {}
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint128(std::uint64_t high_, std::uint64_t low_) noexcept : low {low_}, high {high_} {}
     #else
-    constexpr uint128(std::uint64_t high_, std::uint64_t low_) noexcept : high {high_}, low {low_} {}
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint128(std::uint64_t high_, std::uint64_t low_) noexcept : high {high_}, low {low_} {}
     #endif
 
     #if BOOST_DECIMAL_ENDIAN_LITTLE_BYTE
 
-    #define SIGNED_CONSTRUCTOR(expr) constexpr uint128(expr v) noexcept : low {static_cast<std::uint64_t>(v)}, high {v < 0 ? UINT64_MAX : UINT64_C(0)} {}// NOLINT
-    #define UNSIGNED_CONSTRUCTOR(expr) constexpr uint128(expr v) noexcept : low {static_cast<std::uint64_t>(v)}, high {} {} // NOLINT
+    #define SIGNED_CONSTRUCTOR(expr) BOOST_DECIMAL_GPU_ENABLED constexpr uint128(expr v) noexcept : low {static_cast<std::uint64_t>(v)}, high {v < 0 ? UINT64_MAX : UINT64_C(0)} {}// NOLINT
+    #define UNSIGNED_CONSTRUCTOR(expr) BOOST_DECIMAL_GPU_ENABLED constexpr uint128(expr v) noexcept : low {static_cast<std::uint64_t>(v)}, high {} {} // NOLINT
 
     #else
 
-    #define SIGNED_CONSTRUCTOR(expr) constexpr uint128(expr v) noexcept : high {v < 0 ? UINT64_MAX : UINT64_C(0)}, low {static_cast<std::uint64_t>(v)} {}// NOLINT
-    #define UNSIGNED_CONSTRUCTOR(expr) constexpr uint128(expr v) noexcept : high {}, low {static_cast<std::uint64_t>(v)} {} // NOLINT
+    #define SIGNED_CONSTRUCTOR(expr) BOOST_DECIMAL_GPU_ENABLED constexpr uint128(expr v) noexcept : high {v < 0 ? UINT64_MAX : UINT64_C(0)}, low {static_cast<std::uint64_t>(v)} {}// NOLINT
+    #define UNSIGNED_CONSTRUCTOR(expr) BOOST_DECIMAL_GPU_ENABLED constexpr uint128(expr v) noexcept : high {}, low {static_cast<std::uint64_t>(v)} {} // NOLINT
 
     #endif
 
@@ -97,21 +101,21 @@ struct uint128
     #ifdef BOOST_DECIMAL_HAS_INT128
     #  if BOOST_DECIMAL_ENDIAN_LITTLE_BYTE
 
-    constexpr uint128(boost::decimal::detail::int128_t v) noexcept :  // NOLINT : Allow implicit conversions,
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint128(boost::decimal::detail::int128_t v) noexcept :  // NOLINT : Allow implicit conversions,
          low {static_cast<std::uint64_t>(static_cast<boost::decimal::detail::int128_t>(v) & ~UINT64_C(0))},
          high {static_cast<std::uint64_t>(v >> 64)} {}
 
-    constexpr uint128(boost::decimal::detail::uint128_t v) noexcept : // NOLINT : Allow implicit conversions
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint128(boost::decimal::detail::uint128_t v) noexcept : // NOLINT : Allow implicit conversions
         low {static_cast<std::uint64_t>(v & ~UINT64_C(0))},
         high {static_cast<std::uint64_t>(v >> 64)} {}
 
     #  else
 
-    constexpr uint128(boost::decimal::detail::int128_t v) noexcept :  // NOLINT : Allow implicit conversions,
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint128(boost::decimal::detail::int128_t v) noexcept :  // NOLINT : Allow implicit conversions,
          high {static_cast<std::uint64_t>(v >> 64)},
          low {static_cast<std::uint64_t>(static_cast<boost::decimal::detail::uint128_t>(v) & ~UINT64_C(0))} {}
 
-    constexpr uint128(boost::decimal::detail::uint128_t v) noexcept : // NOLINT : Allow implicit conversions
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint128(boost::decimal::detail::uint128_t v) noexcept : // NOLINT : Allow implicit conversions
         high {static_cast<std::uint64_t>(v >> 64)},
         low {static_cast<std::uint64_t>(v & ~UINT64_C(0))}{}
         
@@ -122,8 +126,8 @@ struct uint128
     #undef UNSIGNED_CONSTRUCTOR
 
     // Assignment Operators
-    #define   SIGNED_ASSIGNMENT_OPERATOR(expr) constexpr auto operator=(const expr& v) noexcept -> uint128& { high = v < 0 ? UINT64_MAX : UINT64_C(0); low = static_cast<std::uint64_t>(v); return *this; } // NOLINT
-    #define UNSIGNED_ASSIGNMENT_OPERATOR(expr) constexpr auto operator=(const expr& v) noexcept -> uint128& { high = 0U; low = static_cast<std::uint64_t>(v); return *this; } // NOLINT
+    #define   SIGNED_ASSIGNMENT_OPERATOR(expr) BOOST_DECIMAL_GPU_ENABLED constexpr auto operator=(const expr& v) noexcept -> uint128& { high = v < 0 ? UINT64_MAX : UINT64_C(0); low = static_cast<std::uint64_t>(v); return *this; } // NOLINT
+    #define UNSIGNED_ASSIGNMENT_OPERATOR(expr) BOOST_DECIMAL_GPU_ENABLED constexpr auto operator=(const expr& v) noexcept -> uint128& { high = 0U; low = static_cast<std::uint64_t>(v); return *this; } // NOLINT
 
     SIGNED_ASSIGNMENT_OPERATOR(char)                    // NOLINT
     SIGNED_ASSIGNMENT_OPERATOR(signed char)             // NOLINT
@@ -139,18 +143,18 @@ struct uint128
     UNSIGNED_ASSIGNMENT_OPERATOR(unsigned long long)    // NOLINT
 
     #ifdef BOOST_DECIMAL_HAS_INT128
-    constexpr auto operator=(const boost::decimal::detail::int128_t&  v) noexcept -> uint128& { *this = uint128(v); return *this; }
-    constexpr auto operator=(const boost::decimal::detail::uint128_t& v) noexcept -> uint128& { *this = uint128(v); return *this; }
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator=(const boost::decimal::detail::int128_t&  v) noexcept -> uint128& { *this = uint128(v); return *this; }
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator=(const boost::decimal::detail::uint128_t& v) noexcept -> uint128& { *this = uint128(v); return *this; }
     #endif
 
-    constexpr uint128& operator=(const uint128&) noexcept = default;
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint128& operator=(const uint128&) noexcept = default;
 
     #undef SIGNED_ASSIGNMENT_OPERATOR
     #undef UNSIGNED_ASSIGNMENT_OPERATOR
 
     // Conversion Operators
-    #define INTEGER_CONVERSION_OPERATOR(expr) explicit constexpr operator expr() const noexcept { return static_cast<expr>(low); }
-    #define   FLOAT_CONVERSION_OPERATOR(expr) explicit           operator expr() const noexcept { return std::ldexp(static_cast<expr>(high), 64) + static_cast<expr>(low); }
+    #define INTEGER_CONVERSION_OPERATOR(expr) BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator expr() const noexcept { return static_cast<expr>(low); }
+    #define   FLOAT_CONVERSION_OPERATOR(expr) BOOST_DECIMAL_GPU_ENABLED explicit           operator expr() const noexcept { using std::ldexp; return ldexp(static_cast<expr>(high), 64) + static_cast<expr>(low); }
 
     INTEGER_CONVERSION_OPERATOR(char)                   // NOLINT
     INTEGER_CONVERSION_OPERATOR(signed char)            // NOLINT
@@ -164,11 +168,11 @@ struct uint128
     INTEGER_CONVERSION_OPERATOR(unsigned long)          // NOLINT
     INTEGER_CONVERSION_OPERATOR(unsigned long long)     // NOLINT
 
-    explicit constexpr operator bool() const noexcept { return high || low; }
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator bool() const noexcept { return high || low; }
 
     #ifdef BOOST_DECIMAL_HAS_INT128
-    explicit constexpr operator int128_t() noexcept { return (static_cast<int128_t>(high) << 64) + low; }
-    explicit constexpr operator uint128_t() const noexcept { return (static_cast<uint128_t>(high) << 64) + low; }
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator int128_t() noexcept { return (static_cast<int128_t>(high) << 64) + low; }
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator uint128_t() const noexcept { return (static_cast<uint128_t>(high) << 64) + low; }
     #endif
 
     #ifdef BOOST_DECIMAL_HAS_FLOAT128
@@ -178,20 +182,23 @@ struct uint128
 
     FLOAT_CONVERSION_OPERATOR(float)        // NOLINT
     FLOAT_CONVERSION_OPERATOR(double)       // NOLINT
+
+    #ifndef BOOST_DECIMAL_NO_LONG_DOUBLE_MATH_FUNCTIONS
     FLOAT_CONVERSION_OPERATOR(long double)  // NOLINT
+    #endif
 
     #undef INTEGER_CONVERSION_OPERATOR
     #undef FLOAT_CONVERSION_OPERATOR
 
     // Unary Operators
-    constexpr friend auto operator-(uint128 val) noexcept -> uint128;
-    constexpr friend auto operator+(uint128 val) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator-(uint128 val) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator+(uint128 val) noexcept -> uint128;
 
     // Comparison Operators
 
     // Equality
-    #define          INTEGER_OPERATOR_EQUAL(expr) constexpr friend auto operator==(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high == 0 && rhs >= 0 && lhs.low == static_cast<std::uint64_t>(rhs); } // NOLINT
-    #define UNSIGNED_INTEGER_OPERATOR_EQUAL(expr) constexpr friend auto operator==(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high == 0 && lhs.low == static_cast<std::uint64_t>(rhs); } // NOLINT
+    #define          INTEGER_OPERATOR_EQUAL(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator==(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high == 0 && rhs >= 0 && lhs.low == static_cast<std::uint64_t>(rhs); } // NOLINT
+    #define UNSIGNED_INTEGER_OPERATOR_EQUAL(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator==(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high == 0 && lhs.low == static_cast<std::uint64_t>(rhs); } // NOLINT
 
     INTEGER_OPERATOR_EQUAL(char)                        // NOLINT
     INTEGER_OPERATOR_EQUAL(signed char)                 // NOLINT
@@ -206,17 +213,17 @@ struct uint128
     UNSIGNED_INTEGER_OPERATOR_EQUAL(unsigned long long) // NOLINT
 
     #ifdef BOOST_DECIMAL_HAS_INT128
-    constexpr friend auto operator==(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> bool { return lhs == uint128(rhs); }
-    constexpr friend auto operator==(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> bool { return lhs == uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator==(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> bool { return lhs == uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator==(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> bool { return lhs == uint128(rhs); }
     #endif
 
-    constexpr friend auto operator==(uint128 lhs, uint128 rhs) noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator==(uint128 lhs, uint128 rhs) noexcept -> bool;
 
     #undef INTEGER_OPERATOR_EQUAL
     #undef UNSIGNED_INTEGER_OPERATOR_EQUAL
 
     // Inequality
-    #define INTEGER_OPERATOR_NOTEQUAL(expr) constexpr friend auto operator!=(uint128 lhs, expr rhs) noexcept -> bool { return !(lhs == rhs); } // NOLINT
+    #define INTEGER_OPERATOR_NOTEQUAL(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator!=(uint128 lhs, expr rhs) noexcept -> bool { return !(lhs == rhs); } // NOLINT
 
     INTEGER_OPERATOR_NOTEQUAL(char)                 // NOLINT
     INTEGER_OPERATOR_NOTEQUAL(signed char)          // NOLINT
@@ -231,17 +238,17 @@ struct uint128
     INTEGER_OPERATOR_NOTEQUAL(unsigned long long)   // NOLINT
 
     #ifdef BOOST_DECIMAL_HAS_INT128
-    constexpr friend auto operator!=(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> bool { return !(lhs == rhs); }
-    constexpr friend auto operator!=(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> bool { return !(lhs == rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator!=(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> bool { return !(lhs == rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator!=(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> bool { return !(lhs == rhs); }
     #endif
 
-    constexpr friend auto operator!=(uint128 lhs, uint128 rhs) noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator!=(uint128 lhs, uint128 rhs) noexcept -> bool;
 
     #undef INTEGER_OPERATOR_NOTEQUAL
 
     // Less than
-    #define          INTEGER_OPERATOR_LESS_THAN(expr) constexpr friend auto operator<(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high == 0U && rhs > 0 && lhs.low < static_cast<std::uint64_t>(rhs); } // NOLINT
-    #define UNSIGNED_INTEGER_OPERATOR_LESS_THAN(expr) constexpr friend auto operator<(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high == 0U && lhs.low < static_cast<std::uint64_t>(rhs); } // NOLINT
+    #define          INTEGER_OPERATOR_LESS_THAN(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator<(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high == 0U && rhs > 0 && lhs.low < static_cast<std::uint64_t>(rhs); } // NOLINT
+    #define UNSIGNED_INTEGER_OPERATOR_LESS_THAN(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator<(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high == 0U && lhs.low < static_cast<std::uint64_t>(rhs); } // NOLINT
 
     INTEGER_OPERATOR_LESS_THAN(char)                            // NOLINT
     INTEGER_OPERATOR_LESS_THAN(signed char)                     // NOLINT
@@ -256,18 +263,18 @@ struct uint128
     UNSIGNED_INTEGER_OPERATOR_LESS_THAN(unsigned long long)     // NOLINT
 
     #ifdef BOOST_DECIMAL_HAS_INT128
-    constexpr friend auto operator<(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> bool { return lhs < uint128(rhs); }
-    constexpr friend auto operator<(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> bool { return lhs < uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator<(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> bool { return lhs < uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator<(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> bool { return lhs < uint128(rhs); }
     #endif
 
-    constexpr friend auto operator<(uint128 lhs, uint128 rhs) noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator<(uint128 lhs, uint128 rhs) noexcept -> bool;
 
     #undef INTEGER_OPERATOR_LESS_THAN
     #undef UNSIGNED_INTEGER_OPERATOR_LESS_THAN
 
     // Less than or equal to
-    #define          INTEGER_OPERATOR_LESS_THAN_OR_EQUAL_TO(expr) constexpr friend auto operator<=(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high == 0U && rhs >= 0 && lhs.low <= static_cast<std::uint64_t>(rhs); } // NOLINT
-    #define UNSIGNED_INTEGER_OPERATOR_LESS_THAN_OR_EQUAL_TO(expr) constexpr friend auto operator<=(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high == 0U && lhs.low <= static_cast<std::uint64_t>(rhs); } // NOLINT
+    #define          INTEGER_OPERATOR_LESS_THAN_OR_EQUAL_TO(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator<=(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high == 0U && rhs >= 0 && lhs.low <= static_cast<std::uint64_t>(rhs); } // NOLINT
+    #define UNSIGNED_INTEGER_OPERATOR_LESS_THAN_OR_EQUAL_TO(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator<=(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high == 0U && lhs.low <= static_cast<std::uint64_t>(rhs); } // NOLINT
 
     INTEGER_OPERATOR_LESS_THAN_OR_EQUAL_TO(char)                            // NOLINT
     INTEGER_OPERATOR_LESS_THAN_OR_EQUAL_TO(signed char)                     // NOLINT
@@ -282,18 +289,18 @@ struct uint128
     UNSIGNED_INTEGER_OPERATOR_LESS_THAN_OR_EQUAL_TO(unsigned long long)     // NOLINT
 
     #ifdef BOOST_DECIMAL_HAS_INT128
-    constexpr friend auto operator<=(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> bool { return lhs <= uint128(rhs); }
-    constexpr friend auto operator<=(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> bool { return lhs <= uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator<=(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> bool { return lhs <= uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator<=(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> bool { return lhs <= uint128(rhs); }
     #endif
 
-    constexpr friend auto operator<=(uint128 lhs, uint128 rhs) noexcept -> bool ;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator<=(uint128 lhs, uint128 rhs) noexcept -> bool ;
 
     #undef INTEGER_OPERATOR_LESS_THAN_OR_EQUAL_TO
     #undef UNSIGNED_INTEGER_OPERATOR_LESS_THAN_OR_EQUAL_TO
 
     // Greater than
-    #define          INTEGER_OPERATOR_GREATER_THAN(expr) constexpr friend auto operator>(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high > 0U || rhs < 0 || lhs.low > static_cast<std::uint64_t>(rhs); } // NOLINT
-    #define UNSIGNED_INTEGER_OPERATOR_GREATER_THAN(expr) constexpr friend auto operator>(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high > 0U || lhs.low > static_cast<std::uint64_t>(rhs); } // NOLINT
+    #define          INTEGER_OPERATOR_GREATER_THAN(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator>(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high > 0U || rhs < 0 || lhs.low > static_cast<std::uint64_t>(rhs); } // NOLINT
+    #define UNSIGNED_INTEGER_OPERATOR_GREATER_THAN(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator>(uint128 lhs, expr rhs) noexcept -> bool { return lhs.high > 0U || lhs.low > static_cast<std::uint64_t>(rhs); } // NOLINT
 
     INTEGER_OPERATOR_GREATER_THAN(char)                             // NOLINT
     INTEGER_OPERATOR_GREATER_THAN(signed char)                      // NOLINT
@@ -308,18 +315,18 @@ struct uint128
     UNSIGNED_INTEGER_OPERATOR_GREATER_THAN(unsigned long long)      // NOLINT
 
     #ifdef BOOST_DECIMAL_HAS_INT128
-    constexpr friend auto operator>(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> bool { return lhs > uint128(rhs); }
-    constexpr friend auto operator>(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> bool { return lhs > uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator>(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> bool { return lhs > uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator>(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> bool { return lhs > uint128(rhs); }
     #endif
 
-    constexpr friend auto operator>(uint128 lhs, uint128 rhs) noexcept -> bool ;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator>(uint128 lhs, uint128 rhs) noexcept -> bool ;
 
     #undef INTEGER_OPERATOR_GREATER_THAN
     #undef UNSIGNED_INTEGER_OPERATOR_GREATER_THAN
 
     // Greater than or equal to
-    #define          INTEGER_OPERATOR_GREATER_THAN_OR_EQUAL_TO(expr) constexpr friend auto operator>=(uint128 lhs, expr rhs) noexcept  -> bool { return lhs.high > 0U || rhs < 0 || lhs.low >= static_cast<std::uint64_t>(rhs); } // NOLINT
-    #define UNSIGNED_INTEGER_OPERATOR_GREATER_THAN_OR_EQUAL_TO(expr) constexpr friend auto operator>=(uint128 lhs, expr rhs) noexcept  -> bool { return lhs.high > 0U || lhs.low >= static_cast<std::uint64_t>(rhs); } // NOLINT
+    #define          INTEGER_OPERATOR_GREATER_THAN_OR_EQUAL_TO(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator>=(uint128 lhs, expr rhs) noexcept  -> bool { return lhs.high > 0U || rhs < 0 || lhs.low >= static_cast<std::uint64_t>(rhs); } // NOLINT
+    #define UNSIGNED_INTEGER_OPERATOR_GREATER_THAN_OR_EQUAL_TO(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator>=(uint128 lhs, expr rhs) noexcept  -> bool { return lhs.high > 0U || lhs.low >= static_cast<std::uint64_t>(rhs); } // NOLINT
 
     INTEGER_OPERATOR_GREATER_THAN_OR_EQUAL_TO(char)                             // NOLINT
     INTEGER_OPERATOR_GREATER_THAN_OR_EQUAL_TO(signed char)                      // NOLINT
@@ -334,11 +341,11 @@ struct uint128
     UNSIGNED_INTEGER_OPERATOR_GREATER_THAN_OR_EQUAL_TO(unsigned long long)      // NOLINT
 
     #ifdef BOOST_DECIMAL_HAS_INT128
-    constexpr friend auto operator>=(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> bool { return lhs >= uint128(rhs); }
-    constexpr friend auto operator>=(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> bool { return lhs >= uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator>=(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> bool { return lhs >= uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator>=(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> bool { return lhs >= uint128(rhs); }
     #endif
 
-    constexpr friend auto operator>=(uint128 lhs, uint128 rhs) noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator>=(uint128 lhs, uint128 rhs) noexcept -> bool;
 
     #undef INTEGER_OPERATOR_GREATER_THAN_OR_EQUAL_TO
     #undef UNSIGNED_INTEGER_OPERATOR_GREATER_THAN_OR_EQUAL_TO
@@ -346,10 +353,10 @@ struct uint128
     // Binary Operators
 
     // Not
-    constexpr friend auto operator~(uint128 v) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator~(uint128 v) noexcept -> uint128;
 
     // Or
-    #define INTEGER_BINARY_OPERATOR_OR(expr) constexpr friend auto operator|(uint128 lhs, expr rhs) noexcept -> uint128 { return {lhs.high, lhs.low | static_cast<std::uint64_t>(rhs)}; } // NOLINT
+    #define INTEGER_BINARY_OPERATOR_OR(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator|(uint128 lhs, expr rhs) noexcept -> uint128 { return {lhs.high, lhs.low | static_cast<std::uint64_t>(rhs)}; } // NOLINT
 
     INTEGER_BINARY_OPERATOR_OR(char)                // NOLINT
     INTEGER_BINARY_OPERATOR_OR(signed char)         // NOLINT
@@ -364,18 +371,18 @@ struct uint128
     INTEGER_BINARY_OPERATOR_OR(unsigned long long)  // NOLINT
 
     #ifdef BOOST_DECIMAL_HAS_INT128
-    constexpr friend auto operator|(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> uint128 { return lhs | uint128(rhs); }
-    constexpr friend auto operator|(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> uint128 { return lhs | uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator|(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> uint128 { return lhs | uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator|(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> uint128 { return lhs | uint128(rhs); }
     #endif
 
-    constexpr friend auto operator|(uint128 lhs, uint128 rhs) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator|(uint128 lhs, uint128 rhs) noexcept -> uint128;
 
-    constexpr auto operator|=(uint128 v) noexcept -> uint128&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator|=(uint128 v) noexcept -> uint128&;
 
     #undef INTEGER_BINARY_OPERATOR_OR
 
     // And
-    #define INTEGER_BINARY_OPERATOR_AND(expr) constexpr friend auto operator&(uint128 lhs, expr rhs) noexcept -> uint128 { return {lhs.high, lhs.low & static_cast<std::uint64_t>(rhs)}; } // NOLINT
+    #define INTEGER_BINARY_OPERATOR_AND(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator&(uint128 lhs, expr rhs) noexcept -> uint128 { return {lhs.high, lhs.low & static_cast<std::uint64_t>(rhs)}; } // NOLINT
 
     INTEGER_BINARY_OPERATOR_AND(char)                   // NOLINT
     INTEGER_BINARY_OPERATOR_AND(signed char)            // NOLINT
@@ -390,18 +397,18 @@ struct uint128
     INTEGER_BINARY_OPERATOR_AND(unsigned long long)     // NOLINT
 
     #ifdef BOOST_DECIMAL_HAS_INT128
-    constexpr friend auto operator&(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> uint128 { return lhs & uint128(rhs); }
-    constexpr friend auto operator&(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> uint128 { return lhs & uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator&(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> uint128 { return lhs & uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator&(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> uint128 { return lhs & uint128(rhs); }
     #endif
 
-    constexpr friend auto operator&(uint128 lhs, uint128 rhs) noexcept-> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator&(uint128 lhs, uint128 rhs) noexcept-> uint128;
 
-    constexpr auto operator&=(uint128 v) noexcept -> uint128&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator&=(uint128 v) noexcept -> uint128&;
 
     #undef INTEGER_BINARY_OPERATOR_AND
 
     // Xor
-    #define INTEGER_BINARY_OPERATOR_XOR(expr) constexpr friend auto operator^(uint128 lhs, expr rhs) noexcept -> uint128 { return {lhs.high, lhs.low ^ static_cast<std::uint64_t>(rhs)}; } // NOLINT
+    #define INTEGER_BINARY_OPERATOR_XOR(expr) BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator^(uint128 lhs, expr rhs) noexcept -> uint128 { return {lhs.high, lhs.low ^ static_cast<std::uint64_t>(rhs)}; } // NOLINT
 
     INTEGER_BINARY_OPERATOR_XOR(char)                   // NOLINT
     INTEGER_BINARY_OPERATOR_XOR(signed char)            // NOLINT
@@ -416,18 +423,19 @@ struct uint128
     INTEGER_BINARY_OPERATOR_XOR(unsigned long long)     // NOLINT
 
     #ifdef BOOST_DECIMAL_HAS_INT128
-    constexpr friend auto operator^(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> uint128 { return lhs ^ uint128(rhs); }
-    constexpr friend auto operator^(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> uint128 { return lhs ^ uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator^(uint128 lhs, boost::decimal::detail::int128_t  rhs) noexcept -> uint128 { return lhs ^ uint128(rhs); }
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator^(uint128 lhs, boost::decimal::detail::uint128_t rhs) noexcept -> uint128 { return lhs ^ uint128(rhs); }
     #endif
 
-    constexpr friend auto operator^(uint128 lhs, uint128 rhs) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator^(uint128 lhs, uint128 rhs) noexcept -> uint128;
 
-    constexpr auto operator^=(uint128 v) noexcept -> uint128&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator^=(uint128 v) noexcept -> uint128&;
 
     #undef INTEGER_BINARY_OPERATOR_XOR
 
     // Left shift
     #define INTEGER_BINARY_OPERATOR_LEFT_SHIFT(expr)                                            \
+    BOOST_DECIMAL_GPU_ENABLED                                                                   \
     constexpr friend auto operator<<(uint128 lhs, expr rhs) noexcept -> uint128                 \
     {                                                                                           \
         if (rhs >= 64)                                                                          \
@@ -455,6 +463,7 @@ struct uint128
     INTEGER_BINARY_OPERATOR_LEFT_SHIFT(unsigned long long)      // NOLINT
 
     #define INTEGER_BINARY_OPERATOR_EQUALS_LEFT_SHIFT(expr)                     \
+    BOOST_DECIMAL_GPU_ENABLED                                                   \
     constexpr auto operator<<=(expr amount) noexcept -> uint128&                \
     {                                                                           \
         *this = *this << amount;                                                \
@@ -478,6 +487,7 @@ struct uint128
 
     // Right Shift
     #define INTEGER_BINARY_OPERATOR_RIGHT_SHIFT(expr)                                               \
+    BOOST_DECIMAL_GPU_ENABLED                                                                       \
     constexpr friend auto operator>>(uint128 lhs, expr amount) noexcept -> uint128                  \
     {                                                                                               \
         if (amount >= 64)                                                                           \
@@ -505,6 +515,7 @@ struct uint128
     INTEGER_BINARY_OPERATOR_RIGHT_SHIFT(unsigned long long)     // NOLINT
 
     #define INTEGER_BINARY_OPERATOR_EQUALS_RIGHT_SHIFT(expr)                        \
+    BOOST_DECIMAL_GPU_ENABLED                                                       \
     constexpr auto operator>>=(expr amount) noexcept -> uint128&                    \
     {                                                                               \
         *this = *this >> amount;                                                    \
@@ -527,50 +538,50 @@ struct uint128
     #undef INTEGER_BINARY_OPERATOR_EQUALS_RIGHT_SHIFT
 
     // Arithmetic operators (Add, sub, mul, div, mod)
-    constexpr auto operator+=(std::uint64_t n) noexcept -> uint128&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+=(std::uint64_t n) noexcept -> uint128&;
 
-    constexpr friend auto operator+(uint128 lhs, uint128 rhs) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator+(uint128 lhs, uint128 rhs) noexcept -> uint128;
 
-    constexpr auto operator+=(uint128 v) noexcept -> uint128&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+=(uint128 v) noexcept -> uint128&;
 
-    constexpr auto operator++() noexcept -> uint128&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator++() noexcept -> uint128&;
 
-    constexpr auto operator++(int) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator++(int) noexcept -> uint128;
 
-    constexpr friend auto operator-(uint128 lhs, uint128 rhs) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator-(uint128 lhs, uint128 rhs) noexcept -> uint128;
 
-    constexpr auto operator-=(uint128 v) noexcept -> uint128&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-=(uint128 v) noexcept -> uint128&;
 
-    constexpr auto operator--() noexcept -> uint128&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator--() noexcept -> uint128&;
 
-    constexpr auto operator--(int) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator--(int) noexcept -> uint128;
 
-    constexpr friend auto operator*(uint128 lhs, uint128 rhs) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator*(uint128 lhs, uint128 rhs) noexcept -> uint128;
 
-    constexpr friend auto operator*(uint128 lhs, std::uint64_t rhs) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator*(uint128 lhs, std::uint64_t rhs) noexcept -> uint128;
 
-    constexpr auto operator*=(uint128 v) noexcept -> uint128&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator*=(uint128 v) noexcept -> uint128&;
 
-    constexpr auto operator*=(std::uint64_t v) noexcept -> uint128&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator*=(std::uint64_t v) noexcept -> uint128&;
 
-    constexpr friend auto operator/(uint128 lhs, uint128 rhs) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator/(uint128 lhs, uint128 rhs) noexcept -> uint128;
 
-    constexpr friend auto operator/(uint128 lhs, std::uint64_t rhs) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator/(uint128 lhs, std::uint64_t rhs) noexcept -> uint128;
 
-    constexpr auto operator/=(uint128 v) noexcept -> uint128&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator/=(uint128 v) noexcept -> uint128&;
 
-    constexpr friend auto operator%(uint128 lhs, uint128 rhs) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator%(uint128 lhs, uint128 rhs) noexcept -> uint128;
 
-    constexpr friend auto operator%(uint128 lhs, std::uint64_t rhs) noexcept -> uint128;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto operator%(uint128 lhs, std::uint64_t rhs) noexcept -> uint128;
 
-    constexpr auto operator%=(uint128 v) noexcept -> uint128&;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator%=(uint128 v) noexcept -> uint128&;
 
     #if !defined(BOOST_DECIMAL_DISABLE_IOSTREAM)
     template <typename charT, typename traits>
-    friend auto operator<<(std::basic_ostream<charT, traits>& os, uint128 val) -> std::basic_ostream<charT, traits>&;
+    BOOST_DECIMAL_GPU_ENABLED friend auto operator<<(std::basic_ostream<charT, traits>& os, uint128 val) -> std::basic_ostream<charT, traits>&;
     #endif
 
-    constexpr void add_with_carry(const uint128& other, bool& carry)
+    BOOST_DECIMAL_GPU_ENABLED constexpr void add_with_carry(const uint128& other, bool& carry)
     {
         auto previous_low = static_cast<std::uint64_t>(low);
 
@@ -583,9 +594,9 @@ struct uint128
     }
 
 private:
-    constexpr friend auto high_bit(uint128 v) noexcept -> int;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto high_bit(uint128 v) noexcept -> int;
 
-    constexpr friend auto div_impl(uint128 lhs, uint128 rhs, uint128 &quotient, uint128 &remainder) noexcept -> void;
+    BOOST_DECIMAL_GPU_ENABLED constexpr friend auto div_impl(uint128 lhs, uint128 rhs, uint128 &quotient, uint128 &remainder) noexcept -> void;
 };
 
 struct int128
@@ -599,54 +610,54 @@ struct int128
     #endif
 
     // Constructors
-    constexpr int128() noexcept = default;
-    constexpr int128(const int128& v) noexcept = default;
-    constexpr int128(int128&& v) noexcept = default;
-    constexpr int128& operator=(const int128& v) = default;
+    BOOST_DECIMAL_GPU_ENABLED constexpr int128() noexcept = default;
+    BOOST_DECIMAL_GPU_ENABLED constexpr int128(const int128& v) noexcept = default;
+    BOOST_DECIMAL_GPU_ENABLED constexpr int128(int128&& v) noexcept = default;
+    BOOST_DECIMAL_GPU_ENABLED constexpr int128& operator=(const int128& v) = default;
 
     #if BOOST_DECIMAL_ENDIAN_LITTLE_BYTE
-    constexpr int128(std::int64_t high_, std::uint64_t low_) noexcept : low {low_}, high {high_} {}
-    constexpr int128(const uint128& v) noexcept : low {v.low}, high {static_cast<std::int64_t>(v.high)} {}
-    explicit constexpr int128(std::uint64_t v) noexcept : low {v}, high {} {}
-    explicit constexpr int128(std::uint32_t v) noexcept : low {v}, high {} {}
-    explicit constexpr int128(std::uint16_t v) noexcept : low {v}, high {} {}
-    explicit constexpr int128(std::uint8_t v) noexcept : low {v}, high {} {}
-    explicit constexpr int128(std::int64_t v) noexcept : low{static_cast<std::uint64_t>(v)}, high{v < 0 ? -1 : 0} {}
-    explicit constexpr int128(std::int32_t v) noexcept : low{static_cast<std::uint64_t>(v)}, high{v < 0 ? -1 : 0} {}
-    explicit constexpr int128(std::int16_t v) noexcept : low{static_cast<std::uint64_t>(v)}, high{v < 0 ? -1 : 0} {}
-    explicit constexpr int128(std::int8_t v) noexcept : low{static_cast<std::uint64_t>(v)}, high{v < 0 ? -1 : 0} {}
+    BOOST_DECIMAL_GPU_ENABLED constexpr int128(std::int64_t high_, std::uint64_t low_) noexcept : low {low_}, high {high_} {}
+    BOOST_DECIMAL_GPU_ENABLED constexpr int128(const uint128& v) noexcept : low {v.low}, high {static_cast<std::int64_t>(v.high)} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::uint64_t v) noexcept : low {v}, high {} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::uint32_t v) noexcept : low {v}, high {} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::uint16_t v) noexcept : low {v}, high {} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::uint8_t v) noexcept : low {v}, high {} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::int64_t v) noexcept : low{static_cast<std::uint64_t>(v)}, high{v < 0 ? -1 : 0} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::int32_t v) noexcept : low{static_cast<std::uint64_t>(v)}, high{v < 0 ? -1 : 0} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::int16_t v) noexcept : low{static_cast<std::uint64_t>(v)}, high{v < 0 ? -1 : 0} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::int8_t v) noexcept : low{static_cast<std::uint64_t>(v)}, high{v < 0 ? -1 : 0} {}
     #else
-    constexpr int128(std::int64_t high_, std::uint64_t low_) noexcept : high {high_}, low {low_} {}
-    constexpr int128(const uint128& v) noexcept : high {static_cast<std::int64_t>(v.high)}, low {v.low} {}
-    explicit constexpr int128(std::uint64_t v) noexcept : high {}, low {v} {}
-    explicit constexpr int128(std::uint32_t v) noexcept : high {}, low {v} {}
-    explicit constexpr int128(std::uint16_t v) noexcept : high {}, low {v} {}
-    explicit constexpr int128(std::uint8_t v) noexcept : high {}, low {v} {}
-    explicit constexpr int128(std::int64_t v) noexcept : high{v < 0 ? -1 : 0}, low{static_cast<std::uint64_t>(v)} {}
-    explicit constexpr int128(std::int32_t v) noexcept : high{v < 0 ? -1 : 0}, low{static_cast<std::uint64_t>(v)} {}
-    explicit constexpr int128(std::int16_t v) noexcept : high{v < 0 ? -1 : 0}, low{static_cast<std::uint64_t>(v)} {}
-    explicit constexpr int128(std::int8_t v) noexcept : high{v < 0 ? -1 : 0}, low{static_cast<std::uint64_t>(v)} {}
+    BOOST_DECIMAL_GPU_ENABLED constexpr int128(std::int64_t high_, std::uint64_t low_) noexcept : high {high_}, low {low_} {}
+    BOOST_DECIMAL_GPU_ENABLED constexpr int128(const uint128& v) noexcept : high {static_cast<std::int64_t>(v.high)}, low {v.low} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::uint64_t v) noexcept : high {}, low {v} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::uint32_t v) noexcept : high {}, low {v} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::uint16_t v) noexcept : high {}, low {v} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::uint8_t v) noexcept : high {}, low {v} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::int64_t v) noexcept : high{v < 0 ? -1 : 0}, low{static_cast<std::uint64_t>(v)} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::int32_t v) noexcept : high{v < 0 ? -1 : 0}, low{static_cast<std::uint64_t>(v)} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::int16_t v) noexcept : high{v < 0 ? -1 : 0}, low{static_cast<std::uint64_t>(v)} {}
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr int128(std::int8_t v) noexcept : high{v < 0 ? -1 : 0}, low{static_cast<std::uint64_t>(v)} {}
     #endif
 
-    explicit constexpr operator uint128() const noexcept;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr operator uint128() const noexcept;
 
-    friend constexpr auto operator-(int128 rhs) noexcept -> int128;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr auto operator-(int128 rhs) noexcept -> int128;
 
-    constexpr auto operator<(int128 rhs) const noexcept -> bool;
-    constexpr auto operator>(int128 rhs) const noexcept -> bool;
-    constexpr auto operator<=(int128 rhs) const noexcept -> bool;
-    constexpr auto operator>=(int128 rhs) const noexcept -> bool;
-    constexpr auto operator==(int128 rhs) const noexcept -> bool;
-    constexpr auto operator!=(int128 rhs) const noexcept -> bool;
-    constexpr auto operator<(std::int64_t rhs) const noexcept -> bool;
-    constexpr auto operator==(std::int64_t rhs) const noexcept -> bool;
-    constexpr auto operator>(std::int64_t rhs) const noexcept -> bool;
-    constexpr auto operator<(int rhs) const noexcept -> bool;
-    constexpr auto operator==(int rhs) const noexcept -> bool;
-    constexpr auto operator>(int rhs) const noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<(int128 rhs) const noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>(int128 rhs) const noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=(int128 rhs) const noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>=(int128 rhs) const noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator==(int128 rhs) const noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator!=(int128 rhs) const noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<(std::int64_t rhs) const noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator==(std::int64_t rhs) const noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>(std::int64_t rhs) const noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<(int rhs) const noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator==(int rhs) const noexcept -> bool;
+    BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>(int rhs) const noexcept -> bool;
 
-    friend constexpr auto operator+(const int128& lhs, const int128& rhs) noexcept -> int128;
-    friend constexpr auto operator-(const int128& lhs, const int128& rhs) noexcept -> int128;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr auto operator+(const int128& lhs, const int128& rhs) noexcept -> int128;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr auto operator-(const int128& lhs, const int128& rhs) noexcept -> int128;
 
     #if !defined(BOOST_DECIMAL_DISABLE_IOSTREAM)
     template <typename charT, typename traits>
@@ -654,31 +665,35 @@ struct int128
     #endif
 };
 
+#ifdef BOOST_DECIMAL_ENABLE_CUDA
+#  pragma nv_diag_default 20012
+#endif
+
 #if (defined(__GNUC__) && __GNUC__ >= 8) || (!defined(BOOST_DECIMAL_ENDIAN_LITTLE_BYTE) && defined(__GNUC__))
 #  pragma GCC diagnostic pop
 #endif
 
-constexpr auto operator-(uint128 val) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(uint128 val) noexcept -> uint128
 {
     return {~val.high + static_cast<std::uint64_t>(val.low == 0), ~val.low + 1};
 }
 
-constexpr auto operator+(uint128 val) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(uint128 val) noexcept -> uint128
 {
     return val;
 }
 
-constexpr auto operator==(uint128 lhs, uint128 rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator==(uint128 lhs, uint128 rhs) noexcept -> bool
 {
     return lhs.high == rhs.high && lhs.low == rhs.low;
 }
 
-constexpr auto operator!=(uint128 lhs, uint128 rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator!=(uint128 lhs, uint128 rhs) noexcept -> bool
 {
     return !(lhs == rhs);
 }
 
-constexpr auto operator<(uint128 lhs, uint128 rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<(uint128 lhs, uint128 rhs) noexcept -> bool
 {
     if (lhs.high == rhs.high)
     {
@@ -688,60 +703,60 @@ constexpr auto operator<(uint128 lhs, uint128 rhs) noexcept -> bool
     return lhs.high < rhs.high;
 }
 
-constexpr auto operator<=(uint128 lhs, uint128 rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=(uint128 lhs, uint128 rhs) noexcept -> bool
 {
     return !(rhs < lhs);
 }
 
-constexpr auto operator>(uint128 lhs, uint128 rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>(uint128 lhs, uint128 rhs) noexcept -> bool
 {
     return rhs < lhs;
 }
 
-constexpr auto operator>=(uint128 lhs, uint128 rhs) noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>=(uint128 lhs, uint128 rhs) noexcept -> bool
 {
     return !(lhs < rhs);
 }
 
-constexpr auto operator~(uint128 v) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator~(uint128 v) noexcept -> uint128
 {
     return {~v.high, ~v.low};
 }
 
-constexpr auto operator|(uint128 lhs, uint128 rhs) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator|(uint128 lhs, uint128 rhs) noexcept -> uint128
 {
     return {lhs.high | rhs.high, lhs.low | rhs.low};
 }
 
-constexpr auto uint128::operator|=(uint128 v) noexcept -> uint128&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator|=(uint128 v) noexcept -> uint128&
 {
     *this = *this | v;
     return *this;
 }
 
-constexpr auto operator&(uint128 lhs, uint128 rhs) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator&(uint128 lhs, uint128 rhs) noexcept -> uint128
 {
     return {lhs.high & rhs.high, lhs.low & rhs.low};
 }
 
-constexpr auto uint128::operator&=(uint128 v) noexcept -> uint128&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator&=(uint128 v) noexcept -> uint128&
 {
     *this = *this & v;
     return *this;
 }
 
-constexpr auto operator^(uint128 lhs, uint128 rhs) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator^(uint128 lhs, uint128 rhs) noexcept -> uint128
 {
     return {lhs.high ^ rhs.high, lhs.low ^ rhs.low};
 }
 
-constexpr auto uint128::operator^=(uint128 v) noexcept -> uint128&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator^=(uint128 v) noexcept -> uint128&
 {
     *this = *this ^ v;
     return *this;
 }
 
-constexpr auto uint128::operator+=(std::uint64_t n) noexcept -> uint128&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator+=(std::uint64_t n) noexcept -> uint128&
 {
     const std::uint64_t new_low { low + n };
 
@@ -755,7 +770,7 @@ constexpr auto uint128::operator+=(std::uint64_t n) noexcept -> uint128&
     return *this;
 }
 
-constexpr auto operator+(uint128 lhs, uint128 rhs) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(uint128 lhs, uint128 rhs) noexcept -> uint128
 {
     #if (defined(BOOST_DECIMAL_HAS_X64_INTRINSICS) || defined(BOOST_DECIMAL_HAS_MSVC_64BIT_INTRINSICS)) && !defined(BOOST_DECIMAL_NO_CONSTEVAL_DETECTION)
     if (!BOOST_DECIMAL_IS_CONSTANT_EVALUATED(lhs.low))
@@ -784,13 +799,13 @@ constexpr auto operator+(uint128 lhs, uint128 rhs) noexcept -> uint128
     }
 }
 
-constexpr auto uint128::operator+=(uint128 v) noexcept -> uint128&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator+=(uint128 v) noexcept -> uint128&
 {
     *this = *this + v;
     return *this;
 }
 
-constexpr auto uint128::operator++() noexcept -> uint128&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator++() noexcept -> uint128&
 {
     if (++low == UINT64_C(0))
     {
@@ -800,12 +815,12 @@ constexpr auto uint128::operator++() noexcept -> uint128&
     return *this;
 }
 
-constexpr auto uint128::operator++(int) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator++(int) noexcept -> uint128
 {
     return ++(*this);
 }
 
-constexpr auto operator-(uint128 lhs, uint128 rhs) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(uint128 lhs, uint128 rhs) noexcept -> uint128
 {
     #if (defined(BOOST_DECIMAL_HAS_X64_INTRINSICS) || defined(BOOST_DECIMAL_HAS_MSVC_64BIT_INTRINSICS)) && !defined(BOOST_DECIMAL_NO_CONSTEVAL_DETECTION)
     if (!BOOST_DECIMAL_IS_CONSTANT_EVALUATED(lhs.low))
@@ -834,13 +849,13 @@ constexpr auto operator-(uint128 lhs, uint128 rhs) noexcept -> uint128
     }
 }
 
-constexpr auto uint128::operator-=(uint128 v) noexcept -> uint128&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator-=(uint128 v) noexcept -> uint128&
 {
     *this = *this - v;
     return *this;
 }
 
-constexpr auto uint128::operator--() noexcept -> uint128&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator--() noexcept -> uint128&
 {
     if (this->low == 0)
     {
@@ -855,20 +870,20 @@ constexpr auto uint128::operator--() noexcept -> uint128&
     return *this;
 }
 
-constexpr auto uint128::operator--(int) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator--(int) noexcept -> uint128
 {
     return --(*this);
 }
 
 using wide_integer_uint128 = ::boost::decimal::math::wide_integer::uint128_t;
 
-constexpr auto uint128_to_wide_integer(const uint128& src) -> wide_integer_uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128_to_wide_integer(const uint128& src) -> wide_integer_uint128
 {
     wide_integer_uint128 dst { };
 
     using local_limb_type = typename wide_integer_uint128::limb_type;
 
-    static_assert(sizeof(local_limb_type) == static_cast<std::size_t>(UINT8_C(4)) && std::is_same<local_limb_type, std::uint32_t>::value, "Error: Configuration of external wide-integer limbs not OK");
+    static_assert(sizeof(local_limb_type) == static_cast<std::size_t>(UINT8_C(4)) && BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::is_same<local_limb_type, std::uint32_t>::value, "Error: Configuration of external wide-integer limbs not OK");
 
     dst.representation()[static_cast<std::size_t>(UINT8_C(0))] = static_cast<local_limb_type>(src.low);
     dst.representation()[static_cast<std::size_t>(UINT8_C(1))] = static_cast<local_limb_type>(src.low >> static_cast<unsigned>(UINT8_C(32)));
@@ -878,7 +893,7 @@ constexpr auto uint128_to_wide_integer(const uint128& src) -> wide_integer_uint1
     return dst;
 }
 
-constexpr auto wide_integer_to_uint128(const wide_integer_uint128& src) -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto wide_integer_to_uint128(const wide_integer_uint128& src) -> uint128
 {
     uint128 dst { };
 
@@ -905,7 +920,7 @@ constexpr auto wide_integer_to_uint128(const wide_integer_uint128& src) -> uint1
     return dst;
 }
 
-constexpr auto operator*(uint128 lhs, uint128 rhs) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator*(uint128 lhs, uint128 rhs) noexcept -> uint128
 {
     const auto a = static_cast<std::uint64_t>(lhs.low >> 32);
     const auto b = static_cast<std::uint64_t>(lhs.low & UINT32_MAX);
@@ -918,7 +933,7 @@ constexpr auto operator*(uint128 lhs, uint128 rhs) noexcept -> uint128
     return result;
 }
 
-constexpr auto operator*(uint128 lhs, std::uint64_t rhs) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator*(uint128 lhs, std::uint64_t rhs) noexcept -> uint128
 {
     using local_unsigned_fast_type = ::boost::decimal::math::wide_integer::detail::unsigned_fast_type;
 
@@ -950,41 +965,33 @@ constexpr auto operator*(uint128 lhs, std::uint64_t rhs) noexcept -> uint128
     return wide_integer_to_uint128(result_wide);
 }
 
-constexpr auto uint128::operator*=(uint128 v) noexcept -> uint128&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator*=(uint128 v) noexcept -> uint128&
 {
     *this = *this * v;
     return *this;
 }
 
-constexpr auto uint128::operator*=(std::uint64_t v) noexcept -> uint128&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator*=(std::uint64_t v) noexcept -> uint128&
 {
     *this = *this * v;
     return *this;
 }
 
-constexpr auto high_bit(uint128 v) noexcept -> int
+BOOST_DECIMAL_GPU_ENABLED constexpr auto high_bit(uint128 v) noexcept -> int
 {
     if (v.high != 0)
     {
-        #ifdef BOOST_DECIMAL_HAS_STDBIT
-        return 127 - std::countl_zero(v.high);
-        #else
         return 127 - countl_zero(v.high);
-        #endif
     }
     else if (v.low != 0)
     {
-        #ifdef BOOST_DECIMAL_HAS_STDBIT
-        return 63 - std::countl_zero(v.low);
-        #else
         return 63 - countl_zero(v.low);
-        #endif
     }
 
     return 0;
 }
 
-constexpr auto div_impl(uint128 lhs, uint128 rhs, uint128& quotient, uint128& remainder) noexcept -> void
+BOOST_DECIMAL_GPU_ENABLED constexpr auto div_impl(uint128 lhs, uint128 rhs, uint128& quotient, uint128& remainder) noexcept -> void
 {
     if ((rhs.high == UINT64_C(0)) && (rhs.low < (static_cast<std::uint64_t>(UINT64_C(0x100000000)))) && (rhs.low > (static_cast<std::uint64_t>(UINT64_C(0x0)))))
     {
@@ -1023,7 +1030,7 @@ constexpr auto div_impl(uint128 lhs, uint128 rhs, uint128& quotient, uint128& re
     }
 }
 
-constexpr auto operator/(uint128 lhs, uint128 rhs) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator/(uint128 lhs, uint128 rhs) noexcept -> uint128
 {
     uint128 quotient {0, 0};
     uint128 remainder {0, 0};
@@ -1032,7 +1039,7 @@ constexpr auto operator/(uint128 lhs, uint128 rhs) noexcept -> uint128
     return quotient;
 }
 
-constexpr auto operator/(uint128 lhs, std::uint64_t rhs) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator/(uint128 lhs, std::uint64_t rhs) noexcept -> uint128
 {
     uint128 quotient {0, 0};
     uint128 remainder {0, 0};
@@ -1041,13 +1048,13 @@ constexpr auto operator/(uint128 lhs, std::uint64_t rhs) noexcept -> uint128
     return quotient;
 }
 
-constexpr auto uint128::operator/=(uint128 v) noexcept -> uint128&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator/=(uint128 v) noexcept -> uint128&
 {
     *this = *this / v;
     return *this;
 }
 
-constexpr auto operator%(uint128 lhs, uint128 rhs) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator%(uint128 lhs, uint128 rhs) noexcept -> uint128
 {
     uint128 quotient {0, 0};
     uint128 remainder {0, 0};
@@ -1056,7 +1063,7 @@ constexpr auto operator%(uint128 lhs, uint128 rhs) noexcept -> uint128
     return remainder;
 }
 
-constexpr auto operator%(uint128 lhs, std::uint64_t rhs) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator%(uint128 lhs, std::uint64_t rhs) noexcept -> uint128
 {
     uint128 quotient {0, 0};
     uint128 remainder {0, 0};
@@ -1065,19 +1072,19 @@ constexpr auto operator%(uint128 lhs, std::uint64_t rhs) noexcept -> uint128
     return remainder;
 }
 
-constexpr auto uint128::operator%=(uint128 v) noexcept -> uint128&
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint128::operator%=(uint128 v) noexcept -> uint128&
 {
     *this = *this % v;
     return *this;
 }
 
-constexpr auto umul64(std::uint32_t x, std::uint32_t y) noexcept -> std::uint64_t
+BOOST_DECIMAL_GPU_ENABLED constexpr auto umul64(std::uint32_t x, std::uint32_t y) noexcept -> std::uint64_t
 {
     return x * static_cast<std::uint64_t>(y);
 }
 
 // Get 128-bit result of multiplication of two 64-bit unsigned integers.
-constexpr auto umul128(std::uint64_t x, std::uint64_t y) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto umul128(std::uint64_t x, std::uint64_t y) noexcept -> uint128
 {
     #if defined(BOOST_DECIMAL_HAS_INT128)
 
@@ -1104,7 +1111,7 @@ constexpr auto umul128(std::uint64_t x, std::uint64_t y) noexcept -> uint128
     #endif
 }
 
-constexpr auto umul128_upper64(std::uint64_t x, std::uint64_t y) noexcept -> std::uint64_t
+BOOST_DECIMAL_GPU_ENABLED constexpr auto umul128_upper64(std::uint64_t x, std::uint64_t y) noexcept -> std::uint64_t
 {
     #if defined(BOOST_DECIMAL_HAS_INT128)
     
@@ -1132,7 +1139,7 @@ constexpr auto umul128_upper64(std::uint64_t x, std::uint64_t y) noexcept -> std
 
 // Get upper 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit
 // unsigned integer.
-constexpr auto umul192_upper128(std::uint64_t x, uint128 y) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto umul192_upper128(std::uint64_t x, uint128 y) noexcept -> uint128
 {
     auto r = umul128(x, y.high);
     r += umul128_upper64(x, y.low);
@@ -1141,7 +1148,7 @@ constexpr auto umul192_upper128(std::uint64_t x, uint128 y) noexcept -> uint128
 
 // Get upper 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit
 // unsigned integer.
-constexpr auto umul96_upper64(std::uint32_t x, std::uint64_t y) noexcept -> std::uint64_t
+BOOST_DECIMAL_GPU_ENABLED constexpr auto umul96_upper64(std::uint32_t x, std::uint64_t y) noexcept -> std::uint64_t
 {
     #if defined(BOOST_DECIMAL_HAS_INT128) || defined(BOOST_DECIMAL_HAS_MSVC_64BIT_INTRINSICS)
     
@@ -1162,7 +1169,7 @@ constexpr auto umul96_upper64(std::uint32_t x, std::uint64_t y) noexcept -> std:
 
 // Get lower 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit
 // unsigned integer.
-constexpr auto umul192_lower128(std::uint64_t x, uint128 y) noexcept -> uint128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto umul192_lower128(std::uint64_t x, uint128 y) noexcept -> uint128
 {
     auto high = x * y.high;
     auto highlow = umul128(x, y.low);
@@ -1171,7 +1178,7 @@ constexpr auto umul192_lower128(std::uint64_t x, uint128 y) noexcept -> uint128
 
 // Get lower 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit
 // unsigned integer.
-constexpr auto umul96_lower64(std::uint32_t x, std::uint64_t y) noexcept -> std::uint64_t
+BOOST_DECIMAL_GPU_ENABLED constexpr auto umul96_lower64(std::uint32_t x, std::uint64_t y) noexcept -> std::uint64_t
 {
     return x * y;
 }
@@ -1223,12 +1230,12 @@ inline auto operator<<(std::basic_ostream<charT, traits>& os, int128 val) -> std
 }
 #endif
 
-constexpr int128::operator uint128() const noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr int128::operator uint128() const noexcept
 {
     return {static_cast<std::uint64_t>(this->high), this->low};
 }
 
-constexpr auto operator-(int128 rhs) noexcept -> int128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(int128 rhs) noexcept -> int128
 {
     const auto new_low {~rhs.low + 1};
     const auto carry {static_cast<std::int64_t>(new_low == 0)};
@@ -1236,7 +1243,7 @@ constexpr auto operator-(int128 rhs) noexcept -> int128
     return int128{new_high, new_low};
 }
 
-constexpr auto int128::operator<(int128 rhs) const noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto int128::operator<(int128 rhs) const noexcept -> bool
 {
     if (high == rhs.high)
     {
@@ -1247,34 +1254,34 @@ constexpr auto int128::operator<(int128 rhs) const noexcept -> bool
 }
 
 // Greater-than operator
-constexpr auto int128::operator>(int128 rhs) const noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto int128::operator>(int128 rhs) const noexcept -> bool
 {
     return rhs < *this;
 }
 
 // Less-than or equal-to operator
-constexpr auto int128::operator<=(int128 rhs) const noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto int128::operator<=(int128 rhs) const noexcept -> bool
 {
     return !(*this > rhs);
 }
 
 // Greater-than or equal-to operator
-constexpr auto int128::operator>=(int128 rhs) const noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto int128::operator>=(int128 rhs) const noexcept -> bool
 {
     return !(*this < rhs);
 }
 
-constexpr auto int128::operator==(int128 rhs) const noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto int128::operator==(int128 rhs) const noexcept -> bool
 {
     return this->high == rhs.high && this->low == rhs.low;
 }
 
-constexpr auto int128::operator!=(int128 rhs) const noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto int128::operator!=(int128 rhs) const noexcept -> bool
 {
     return !(*this == rhs);
 }
 
-constexpr auto int128::operator==(std::int64_t rhs) const noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto int128::operator==(std::int64_t rhs) const noexcept -> bool
 {
     if (high == 0 && low == static_cast<std::uint64_t>(rhs))
     {
@@ -1288,7 +1295,7 @@ constexpr auto int128::operator==(std::int64_t rhs) const noexcept -> bool
     return false;
 }
 
-constexpr auto int128::operator<(std::int64_t rhs) const noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto int128::operator<(std::int64_t rhs) const noexcept -> bool
 {
     if (high < 0 && rhs >= 0)
     {
@@ -1306,27 +1313,27 @@ constexpr auto int128::operator<(std::int64_t rhs) const noexcept -> bool
     return false;
 }
 
-constexpr auto int128::operator>(std::int64_t rhs) const noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto int128::operator>(std::int64_t rhs) const noexcept -> bool
 {
     return !(*this == rhs) && !(*this < rhs);
 }
 
-constexpr auto int128::operator==(int rhs) const noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto int128::operator==(int rhs) const noexcept -> bool
 {
     return *this == static_cast<std::int64_t>(rhs);
 }
 
-constexpr auto int128::operator<(int rhs) const noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto int128::operator<(int rhs) const noexcept -> bool
 {
     return *this < static_cast<std::int64_t>(rhs);
 }
 
-constexpr auto int128::operator>(int rhs) const noexcept -> bool
+BOOST_DECIMAL_GPU_ENABLED constexpr auto int128::operator>(int rhs) const noexcept -> bool
 {
     return *this > static_cast<std::int64_t>(rhs);
 }
 
-constexpr auto operator+(const int128& lhs, const int128& rhs) noexcept -> int128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(const int128& lhs, const int128& rhs) noexcept -> int128
 {
     #if (defined(BOOST_DECIMAL_HAS_X64_INTRINSICS) || defined(BOOST_DECIMAL_HAS_MSVC_64BIT_INTRINSICS)) && !defined(BOOST_DECIMAL_NO_CONSTEVAL_DETECTION)
     if (!BOOST_DECIMAL_IS_CONSTANT_EVALUATED(lhs.low))
@@ -1359,7 +1366,7 @@ constexpr auto operator+(const int128& lhs, const int128& rhs) noexcept -> int12
     }
 }
 
-constexpr auto operator-(const int128& lhs, const int128& rhs) noexcept -> int128
+BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(const int128& lhs, const int128& rhs) noexcept -> int128
 {
     const auto new_low {lhs.low - rhs.low};
     const auto new_high {lhs.high - rhs.high - static_cast<std::int64_t>(lhs.low < rhs.low)};
@@ -1418,15 +1425,15 @@ struct numeric_limits<boost::decimal::detail::uint128>
     BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr bool tinyness_before = false;
 
     // Member functions
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto (min)        () -> boost::decimal::detail::uint128 { return 0; }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto lowest       () -> boost::decimal::detail::uint128 { return 0; }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto (max)        () -> boost::decimal::detail::uint128 { return {UINT64_MAX, UINT64_MAX}; }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto epsilon      () -> boost::decimal::detail::uint128 { return 0; }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto round_error  () -> boost::decimal::detail::uint128 { return 0; }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto infinity     () -> boost::decimal::detail::uint128 { return 0; }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto quiet_NaN    () -> boost::decimal::detail::uint128 { return 0; }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto signaling_NaN() -> boost::decimal::detail::uint128 { return 0; }
-    BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto denorm_min   () -> boost::decimal::detail::uint128 { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto (min)        () -> boost::decimal::detail::uint128 { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto lowest       () -> boost::decimal::detail::uint128 { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto (max)        () -> boost::decimal::detail::uint128 { return {UINT64_MAX, UINT64_MAX}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto epsilon      () -> boost::decimal::detail::uint128 { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto round_error  () -> boost::decimal::detail::uint128 { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto infinity     () -> boost::decimal::detail::uint128 { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto quiet_NaN    () -> boost::decimal::detail::uint128 { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto signaling_NaN() -> boost::decimal::detail::uint128 { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_ATTRIBUTE_UNUSED static constexpr auto denorm_min   () -> boost::decimal::detail::uint128 { return 0; }
 };
 
 } // namespace std
diff --git a/include/boost/decimal/detail/emulated256.hpp b/include/boost/decimal/detail/emulated256.hpp
index 55df7630a..a638ad992 100644
--- a/include/boost/decimal/detail/emulated256.hpp
+++ b/include/boost/decimal/detail/emulated256.hpp
@@ -7,105 +7,109 @@
 
 #include <boost/decimal/detail/config.hpp>
 #include <boost/decimal/detail/emulated128.hpp>
+#include <boost/decimal/detail/tuple.hpp>
 #include <boost/decimal/detail/wide-integer/uintwide_t.hpp>
 
 #ifndef BOOST_DECIMAL_BUILD_MODULE
 #include <cstdint>
 #include <cmath>
-#include <tuple>
 #endif
 
 namespace boost {
 namespace decimal {
 namespace detail {
 
+#ifdef BOOST_DECIMAL_ENABLE_CUDA
+#  pragma nv_diag_suppress 20012
+#endif
+
 struct uint256_t
 {
     uint128 high {};
     uint128 low {};
 
-    constexpr uint256_t() = default;
-    constexpr uint256_t& operator=(const uint256_t& rhs) = default;
-    constexpr uint256_t(const uint256_t& rhs) = default;
-    explicit constexpr uint256_t(const uint128& rhs) : high {}, low {rhs} {}
-    constexpr uint256_t(const uint128& high_, const uint128& low_) : high {high_}, low {low_} {}
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t() = default;
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t& operator=(const uint256_t& rhs) = default;
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t(const uint256_t& rhs) = default;
+    BOOST_DECIMAL_GPU_ENABLED explicit constexpr uint256_t(const uint128& rhs) : high {}, low {rhs} {}
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t(const uint128& high_, const uint128& low_) : high {high_}, low {low_} {}
 
-    explicit operator uint128() const noexcept
+    BOOST_DECIMAL_GPU_ENABLED explicit operator uint128() const noexcept
     { 
         return this->low; 
     }
 
-    explicit operator std::size_t() const noexcept
+    BOOST_DECIMAL_GPU_ENABLED explicit operator std::size_t() const noexcept
     {
         return static_cast<std::size_t>(this->low);
     }
 
 
-    friend constexpr uint256_t operator>>(uint256_t lhs, int amount) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator>>(uint256_t lhs, int amount) noexcept;
 
-    constexpr uint256_t &operator>>=(int amount) noexcept
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t &operator>>=(int amount) noexcept
     {
         *this = *this >> amount;
         return *this;
     }
 
-    friend constexpr uint256_t operator<<(uint256_t lhs, int amount) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator<<(uint256_t lhs, int amount) noexcept;
 
-    constexpr uint256_t &operator<<=(int amount) noexcept
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t &operator<<=(int amount) noexcept
     {
         *this = *this << amount;
         return *this;
     }
 
-    friend constexpr uint256_t operator|(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator|(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    constexpr uint256_t &operator|=(uint256_t v) noexcept
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t &operator|=(uint256_t v) noexcept
     {
         *this = *this | v;
         return *this;
     }
 
-    friend constexpr uint256_t operator&(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator&(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    friend constexpr uint256_t operator&(uint256_t lhs, uint128 rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator&(uint256_t lhs, uint128 rhs) noexcept;
 
-    friend constexpr bool operator==(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr bool operator==(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    friend constexpr bool operator==(uint256_t lhs, std::uint64_t rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr bool operator==(uint256_t lhs, std::uint64_t rhs) noexcept;
 
-    friend constexpr bool operator!=(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr bool operator!=(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    friend constexpr bool operator<(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr bool operator<(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    friend constexpr bool operator<=(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr bool operator<=(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    friend constexpr bool operator>(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr bool operator>(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    friend constexpr bool operator>=(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr bool operator>=(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    friend constexpr uint256_t operator+(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator+(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    friend constexpr uint256_t operator+(uint256_t lhs, uint128 rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator+(uint256_t lhs, uint128 rhs) noexcept;
 
-    friend constexpr uint256_t operator*(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator*(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    friend constexpr uint256_t operator*(const uint256_t& lhs, const std::uint64_t rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator*(const uint256_t& lhs, const std::uint64_t rhs) noexcept;
 
-    friend constexpr uint256_t operator-(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator-(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    constexpr uint256_t &operator-=(uint256_t v) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t &operator-=(uint256_t v) noexcept;
 
-    friend constexpr uint256_t operator/(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator/(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    friend constexpr uint256_t operator/(const uint256_t& lhs, std::uint64_t rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator/(const uint256_t& lhs, std::uint64_t rhs) noexcept;
 
-    constexpr uint256_t& operator/=(std::uint64_t rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t& operator/=(std::uint64_t rhs) noexcept;
 
-    constexpr uint256_t& operator/=(const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t& operator/=(const uint256_t& rhs) noexcept;
 
-    friend constexpr uint256_t operator%(const uint256_t& lhs, const uint256_t& rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator%(const uint256_t& lhs, const uint256_t& rhs) noexcept;
 
-    friend constexpr uint256_t operator%(uint256_t lhs, std::uint64_t rhs) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr uint256_t operator%(uint256_t lhs, std::uint64_t rhs) noexcept;
 
     #if !defined(BOOST_DECIMAL_DISABLE_IOSTREAM)
     template <typename charT, typename traits>
@@ -113,10 +117,14 @@ struct uint256_t
     #endif
 
 private:
-    friend constexpr int high_bit(uint256_t v) noexcept;
+    BOOST_DECIMAL_GPU_ENABLED friend constexpr int high_bit(uint256_t v) noexcept;
 };
 
-constexpr uint256_t operator>>(uint256_t lhs, int amount) noexcept
+#ifdef BOOST_DECIMAL_ENABLE_CUDA
+#  pragma nv_diag_default 20012
+#endif
+
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator>>(uint256_t lhs, int amount) noexcept
 {
     if (amount >= 128)
     {
@@ -130,7 +138,7 @@ constexpr uint256_t operator>>(uint256_t lhs, int amount) noexcept
     return {lhs.high >> amount, (lhs.low >> amount) | (lhs.high << (128 - amount))};
 }
 
-constexpr uint256_t operator<<(uint256_t lhs, int amount) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator<<(uint256_t lhs, int amount) noexcept
 {
     if (amount >= 128)
     {
@@ -144,37 +152,37 @@ constexpr uint256_t operator<<(uint256_t lhs, int amount) noexcept
     return {(lhs.high << amount) | (lhs.low >> (128 - amount)), lhs.low << amount};
 }
 
-constexpr uint256_t operator|(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator|(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     return {lhs.high | rhs.high, lhs.low | rhs.low};
 }
 
-constexpr uint256_t operator&(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator&(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     return {lhs.high & rhs.high, lhs.low & rhs.low};
 }
 
-constexpr uint256_t operator&(uint256_t lhs, uint128 rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator&(uint256_t lhs, uint128 rhs) noexcept
 {
     return {lhs.high, lhs.low & rhs.low};
 }
 
-constexpr bool operator==(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr bool operator==(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     return lhs.high == rhs.high && lhs.low == rhs.low;
 }
 
-constexpr bool operator==(uint256_t lhs, std::uint64_t rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr bool operator==(uint256_t lhs, std::uint64_t rhs) noexcept
 {
     return lhs.high == 0 && rhs != 0 && lhs.low == rhs;
 }
 
-constexpr bool operator!=(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr bool operator!=(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     return !(lhs.high == rhs.high && lhs.low == rhs.low);
 }
 
-constexpr bool operator<(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr bool operator<(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     if (lhs.high == rhs.high)
     {
@@ -184,22 +192,22 @@ constexpr bool operator<(const uint256_t& lhs, const uint256_t& rhs) noexcept
     return lhs.high < rhs.high;
 }
 
-constexpr bool operator<=(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr bool operator<=(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     return !(rhs < lhs);
 }
 
-constexpr bool operator>(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr bool operator>(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     return rhs < lhs;
 }
 
-constexpr bool operator>=(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr bool operator>=(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     return !(lhs < rhs);
 }
 
-constexpr uint256_t operator+(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator+(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     const uint256_t temp = {lhs.high + rhs.high, lhs.low + rhs.low};
 
@@ -212,7 +220,7 @@ constexpr uint256_t operator+(const uint256_t& lhs, const uint256_t& rhs) noexce
     return temp;
 }
 
-constexpr uint256_t operator+(uint256_t lhs, uint128 rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator+(uint256_t lhs, uint128 rhs) noexcept
 {
     const uint256_t temp = {lhs.high, lhs.low + rhs};
 
@@ -224,7 +232,7 @@ constexpr uint256_t operator+(uint256_t lhs, uint128 rhs) noexcept
     return temp;
 }
 
-constexpr uint256_t operator-(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator-(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     const uint256_t temp {lhs.high - rhs.high, lhs.low - rhs.low};
 
@@ -237,13 +245,13 @@ constexpr uint256_t operator-(const uint256_t& lhs, const uint256_t& rhs) noexce
     return temp;
 }
 
-constexpr uint256_t &uint256_t::operator-=(uint256_t v) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t &uint256_t::operator-=(uint256_t v) noexcept
 {
     *this = *this - v;
     return *this;
 }
 
-constexpr int high_bit(uint256_t v) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr int high_bit(uint256_t v) noexcept
 {
     if (v.high != 0)
     {
@@ -258,7 +266,7 @@ constexpr int high_bit(uint256_t v) noexcept
 }
 
 // Function to compare two uint256_t numbers (returns -1, 0, or 1)
-constexpr int compare(const uint256_t& a, const uint256_t& b)
+BOOST_DECIMAL_GPU_ENABLED constexpr int compare(const uint256_t& a, const uint256_t& b)
 {
     if (a.high < b.high || (a.high == b.high && a.low < b.low))
     {
@@ -274,7 +282,7 @@ constexpr int compare(const uint256_t& a, const uint256_t& b)
 
 // The following are all needed for the division algorithm
 // Function to subtract two uint256_t numbers
-constexpr uint256_t subtract(const uint256_t& a, const uint256_t& b)
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t subtract(const uint256_t& a, const uint256_t& b)
 {
     uint256_t result;
     result.low = a.low - b.low;
@@ -288,7 +296,7 @@ constexpr uint256_t subtract(const uint256_t& a, const uint256_t& b)
 }
 
 // Function to left shift a uint256_t by one bit
-constexpr uint256_t left_shift(const uint256_t& a)
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t left_shift(const uint256_t& a)
 {
     uint256_t result;
     result.high = (a.high << 1) | (a.low >> (sizeof(uint128) * 8 - 1));
@@ -297,7 +305,7 @@ constexpr uint256_t left_shift(const uint256_t& a)
 }
 
 // Function to set a specific bit of a uint256_t
-constexpr void set_bit(uint256_t& a, int bit)
+BOOST_DECIMAL_GPU_ENABLED constexpr void set_bit(uint256_t& a, int bit)
 {
     if (bit >= 0 && bit < 128)
     {
@@ -311,7 +319,7 @@ constexpr void set_bit(uint256_t& a, int bit)
 
 using wide_integer_uint256 = ::boost::decimal::math::wide_integer::uint256_t;
 
-constexpr auto uint256_to_wide_integer(const uint256_t& src) -> wide_integer_uint256
+BOOST_DECIMAL_GPU_ENABLED constexpr auto uint256_to_wide_integer(const uint256_t& src) -> wide_integer_uint256
 {
     wide_integer_uint256 dst { };
 
@@ -331,7 +339,7 @@ constexpr auto uint256_to_wide_integer(const uint256_t& src) -> wide_integer_uin
     return dst;
 }
 
-constexpr auto wide_integer_to_uint256(const wide_integer_uint256& src) -> uint256_t
+BOOST_DECIMAL_GPU_ENABLED constexpr auto wide_integer_to_uint256(const wide_integer_uint256& src) -> uint256_t
 {
     uint256_t dst { };
 
@@ -378,7 +386,7 @@ constexpr auto wide_integer_to_uint256(const wide_integer_uint256& src) -> uint2
     return dst;
 }
 
-constexpr uint256_t operator*(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator*(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     using local_unsigned_fast_type = ::boost::decimal::math::wide_integer::detail::unsigned_fast_type;
 
@@ -397,7 +405,7 @@ constexpr uint256_t operator*(const uint256_t& lhs, const uint256_t& rhs) noexce
     return wide_integer_to_uint256(result_wide);
 }
 
-constexpr uint256_t operator*(const uint256_t& lhs, const std::uint64_t rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator*(const uint256_t& lhs, const std::uint64_t rhs) noexcept
 {
     using local_unsigned_fast_type = ::boost::decimal::math::wide_integer::detail::unsigned_fast_type;
 
@@ -430,10 +438,10 @@ constexpr uint256_t operator*(const uint256_t& lhs, const std::uint64_t rhs) noe
 }
 
 // Forward declaration of specialized division 256-bits / 64-bits.
-constexpr std::tuple<uint256_t, uint256_t> divide_with_rem(const uint256_t& dividend, const std::uint64_t& divisor) noexcept;
+BOOST_DECIMAL_GPU_ENABLED constexpr boost::decimal::tuple<uint256_t, uint256_t> divide_with_rem(const uint256_t& dividend, const std::uint64_t& divisor) noexcept;
 
 // The division algorithm
-constexpr std::tuple<uint256_t, uint256_t> divide(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr boost::decimal::tuple<uint256_t, uint256_t> divide(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     if ((rhs.high.high == UINT64_C(0)) && (rhs.high.low == UINT64_C(0)) && (rhs.low.high == UINT64_C(0)) && (rhs.low.low < (static_cast<std::uint64_t>(UINT64_C(0x100000000)))) && (rhs.low.low > (static_cast<std::uint64_t>(UINT64_C(0)))))
     {
@@ -457,7 +465,7 @@ constexpr std::tuple<uint256_t, uint256_t> divide(const uint256_t& lhs, const ui
     }
 }
 
-constexpr std::tuple<uint256_t, uint256_t> divide_with_rem(const uint256_t& dividend, const std::uint64_t& divisor) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr boost::decimal::tuple<uint256_t, uint256_t> divide_with_rem(const uint256_t& dividend, const std::uint64_t& divisor) noexcept
 {
     uint256_t quotient { { 0U, 0U }, { 0U, 0U }};
 
@@ -483,13 +491,13 @@ constexpr std::tuple<uint256_t, uint256_t> divide_with_rem(const uint256_t& divi
     };
 }
 
-constexpr uint256_t operator/(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator/(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     const auto res {divide(lhs, rhs)};
-    return std::get<0>(res);
+    return boost::decimal::get<0>(res);
 }
 
-constexpr uint256_t operator/(const uint256_t& lhs, std::uint64_t rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator/(const uint256_t& lhs, std::uint64_t rhs) noexcept
 {
     // Same code as divide_with_rem but skips the modulus step
 
@@ -513,32 +521,32 @@ constexpr uint256_t operator/(const uint256_t& lhs, std::uint64_t rhs) noexcept
     return quotient;
 }
 
-constexpr uint256_t& uint256_t::operator/=(std::uint64_t rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t& uint256_t::operator/=(std::uint64_t rhs) noexcept
 {
     *this = *this / rhs;
     return *this;
 }
 
-constexpr uint256_t& uint256_t::operator/=(const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t& uint256_t::operator/=(const uint256_t& rhs) noexcept
 {
     *this = *this / rhs;
     return *this;
 }
 
-constexpr uint256_t operator%(const uint256_t& lhs, const uint256_t& rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator%(const uint256_t& lhs, const uint256_t& rhs) noexcept
 {
     const auto res {divide(lhs, rhs)};
-    return std::get<1>(res);
+    return boost::decimal::get<1>(res);
 }
 
-constexpr uint256_t operator%(uint256_t lhs, std::uint64_t rhs) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t operator%(uint256_t lhs, std::uint64_t rhs) noexcept
 {
     const auto res {divide(lhs, uint256_t(rhs))};
-    return std::get<1>(res);
+    return boost::decimal::get<1>(res);
 }
 
 // Get the 256-bit result of multiplication of two 128-bit unsigned integers
-constexpr uint256_t umul256_impl(std::uint64_t a_high, std::uint64_t a_low, std::uint64_t b_high, std::uint64_t b_low) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t umul256_impl(std::uint64_t a_high, std::uint64_t a_low, std::uint64_t b_high, std::uint64_t b_low) noexcept
 {
     #ifdef BOOST_DECIMAL_HAS_INT128
     using unsigned_int128_type = boost::decimal::detail::uint128_t;
@@ -576,7 +584,7 @@ constexpr uint256_t umul256_impl(std::uint64_t a_high, std::uint64_t a_low, std:
 }
 
 template<typename T>
-constexpr uint256_t umul256(const T &x, const uint128 &y) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t umul256(const T &x, const uint128 &y) noexcept
 {
     static_assert(sizeof(T) == 16 && (!std::numeric_limits<T>::is_signed
             #ifdef BOOST_DECIMAL_HAS_INT128
@@ -591,7 +599,7 @@ constexpr uint256_t umul256(const T &x, const uint128 &y) noexcept
     return umul256_impl(a, b, y.high, y.low);
 }
 
-constexpr uint256_t umul256(const uint128 &x, const uint128 &y) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr uint256_t umul256(const uint128 &x, const uint128 &y) noexcept
 {
     return umul256_impl(x.high, x.low, y.high, y.low);
 }
@@ -635,45 +643,45 @@ template <>
 struct numeric_limits<boost::decimal::detail::uint256_t>
 {
     // Member constants
-    static constexpr bool is_specialized = true;
-    static constexpr bool is_signed = false;
-    static constexpr bool is_integer = true;
-    static constexpr bool is_exact = true;
-    static constexpr bool has_infinity = false;
-    static constexpr bool has_quiet_NaN = false;
-    static constexpr bool has_signaling_NaN = false;
+    BOOST_DECIMAL_STATIC constexpr bool is_specialized = true;
+    BOOST_DECIMAL_STATIC constexpr bool is_signed = false;
+    BOOST_DECIMAL_STATIC constexpr bool is_integer = true;
+    BOOST_DECIMAL_STATIC constexpr bool is_exact = true;
+    BOOST_DECIMAL_STATIC constexpr bool has_infinity = false;
+    BOOST_DECIMAL_STATIC constexpr bool has_quiet_NaN = false;
+    BOOST_DECIMAL_STATIC constexpr bool has_signaling_NaN = false;
 
     // These members were deprecated in C++23
     #if ((!defined(_MSC_VER) && (__cplusplus <= 202002L)) || (defined(_MSC_VER) && (_MSVC_LANG <= 202002L)))
-    static constexpr std::float_denorm_style has_denorm = std::denorm_absent;
-    static constexpr bool has_denorm_loss = false;
+    BOOST_DECIMAL_STATIC constexpr std::float_denorm_style has_denorm = std::denorm_absent;
+    BOOST_DECIMAL_STATIC constexpr bool has_denorm_loss = false;
     #endif
 
-    static constexpr std::float_round_style round_style = std::round_toward_zero;
-    static constexpr bool is_iec559 = false;
-    static constexpr bool is_bounded = true;
-    static constexpr bool is_modulo = true;
-    static constexpr int digits = 256;
-    static constexpr int digits10 = 76;
-    static constexpr int max_digits10 = 0;
-    static constexpr int radix = 2;
-    static constexpr int min_exponent = 0;
-    static constexpr int min_exponent10 = 0;
-    static constexpr int max_exponent = 0;
-    static constexpr int max_exponent10 = 0;
-    static constexpr bool traps = std::numeric_limits<std::uint64_t>::traps;
-    static constexpr bool tinyness_before = false;
+    BOOST_DECIMAL_STATIC constexpr std::float_round_style round_style = std::round_toward_zero;
+    BOOST_DECIMAL_STATIC constexpr bool is_iec559 = false;
+    BOOST_DECIMAL_STATIC constexpr bool is_bounded = true;
+    BOOST_DECIMAL_STATIC constexpr bool is_modulo = true;
+    BOOST_DECIMAL_STATIC constexpr int digits = 256;
+    BOOST_DECIMAL_STATIC constexpr int digits10 = 76;
+    BOOST_DECIMAL_STATIC constexpr int max_digits10 = 0;
+    BOOST_DECIMAL_STATIC constexpr int radix = 2;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent = 0;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent10 = 0;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent = 0;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent10 = 0;
+    BOOST_DECIMAL_STATIC constexpr bool traps = std::numeric_limits<std::uint64_t>::traps;
+    BOOST_DECIMAL_STATIC constexpr bool tinyness_before = false;
 
     // Member functions
-    static constexpr boost::decimal::detail::uint256_t (min)() { return {0, 0}; }
-    static constexpr boost::decimal::detail::uint256_t lowest() { return {0, 0}; }
-    static constexpr boost::decimal::detail::uint256_t (max)() { return {{UINT64_MAX, UINT64_MAX}, {UINT64_MAX, UINT64_MAX}}; }
-    static constexpr boost::decimal::detail::uint256_t epsilon() { return {0, 0}; }
-    static constexpr boost::decimal::detail::uint256_t round_error() { return {0, 0}; }
-    static constexpr boost::decimal::detail::uint256_t infinity() { return {0, 0}; }
-    static constexpr boost::decimal::detail::uint256_t quiet_NaN() { return {0, 0}; }
-    static constexpr boost::decimal::detail::uint256_t signaling_NaN() { return {0, 0}; }
-    static constexpr boost::decimal::detail::uint256_t denorm_min() { return {0, 0}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr boost::decimal::detail::uint256_t (min)() { return {0, 0}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr boost::decimal::detail::uint256_t lowest() { return {0, 0}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr boost::decimal::detail::uint256_t (max)() { return {{UINT64_MAX, UINT64_MAX}, {UINT64_MAX, UINT64_MAX}}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr boost::decimal::detail::uint256_t epsilon() { return {0, 0}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr boost::decimal::detail::uint256_t round_error() { return {0, 0}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr boost::decimal::detail::uint256_t infinity() { return {0, 0}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr boost::decimal::detail::uint256_t quiet_NaN() { return {0, 0}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr boost::decimal::detail::uint256_t signaling_NaN() { return {0, 0}; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr boost::decimal::detail::uint256_t denorm_min() { return {0, 0}; }
 };
 
 } // Namespace std
diff --git a/include/boost/decimal/detail/fenv_rounding.hpp b/include/boost/decimal/detail/fenv_rounding.hpp
index 55cb7087a..884f26a11 100644
--- a/include/boost/decimal/detail/fenv_rounding.hpp
+++ b/include/boost/decimal/detail/fenv_rounding.hpp
@@ -18,7 +18,7 @@ namespace detail {
 
 // Rounds the value provided and returns an offset of exponent values as required
 template <typename TargetType = decimal32, typename T, std::enable_if_t<is_integral_v<T>, bool> = true>
-constexpr auto fenv_round(T& val, bool = false) noexcept -> int
+BOOST_DECIMAL_GPU_ENABLED constexpr auto fenv_round(T& val, bool = false) noexcept -> int
 {
     using significand_type = std::conditional_t<std::is_same<TargetType, decimal128>::value || std::is_same<TargetType, decimal128_fast>::value, detail::uint128, int>;
 
@@ -44,7 +44,7 @@ constexpr auto fenv_round(T& val, bool = false) noexcept -> int
 #else
 
 template <typename TargetType = decimal32, typename T, std::enable_if_t<is_integral_v<T>, bool> = true>
-constexpr auto fenv_round(T& val, bool is_neg = false) noexcept -> int // NOLINT(readability-function-cognitive-complexity)
+BOOST_DECIMAL_GPU_ENABLED constexpr auto fenv_round(T& val, bool is_neg = false) noexcept -> int // NOLINT(readability-function-cognitive-complexity)
 {
     using significand_type = std::conditional_t<std::is_same<TargetType, decimal128>::value || std::is_same<TargetType, decimal128_fast>::value, detail::uint128, int>;
 
diff --git a/include/boost/decimal/detail/integer_search_trees.hpp b/include/boost/decimal/detail/integer_search_trees.hpp
index 345c347aa..4fc3079de 100644
--- a/include/boost/decimal/detail/integer_search_trees.hpp
+++ b/include/boost/decimal/detail/integer_search_trees.hpp
@@ -24,7 +24,7 @@ namespace detail {
 
 // Generic solution
 template <typename T>
-constexpr auto num_digits(T x) noexcept -> int
+BOOST_DECIMAL_GPU_ENABLED constexpr auto num_digits(T x) noexcept -> int
 {
     int digits = 0;
 
@@ -38,7 +38,7 @@ constexpr auto num_digits(T x) noexcept -> int
 }
 
 template <>
-constexpr auto num_digits(std::uint32_t x) noexcept -> int
+BOOST_DECIMAL_GPU_ENABLED constexpr auto num_digits(std::uint32_t x) noexcept -> int
 {
     if (x >= UINT32_C(10000))
     {
@@ -82,7 +82,7 @@ constexpr auto num_digits(std::uint32_t x) noexcept -> int
 }
 
 template <>
-constexpr auto num_digits(std::uint64_t x) noexcept -> int
+BOOST_DECIMAL_GPU_ENABLED constexpr auto num_digits(std::uint64_t x) noexcept -> int
 {
     if (x >= UINT64_C(10000000000))
     {
@@ -168,7 +168,7 @@ constexpr auto num_digits(std::uint64_t x) noexcept -> int
 # pragma warning(disable: 4307) // MSVC 14.1 warns of intergral constant overflow
 #endif
 
-constexpr int num_digits(const uint128& x) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr int num_digits(const uint128& x) noexcept
 {
     if (x.high == UINT64_C(0))
     {
@@ -183,7 +183,7 @@ constexpr int num_digits(const uint128& x) noexcept
     {
         std::uint32_t mid = (left + right + 1U) / 2U;
 
-        if (x >= impl::emulated_128_pow10[mid])
+        if (x >= pow10<uint128>(mid))
         {
             left = mid;
         }
@@ -196,7 +196,7 @@ constexpr int num_digits(const uint128& x) noexcept
     return static_cast<int>(left + 1);
 }
 
-constexpr int num_digits(const uint256_t& x) noexcept
+BOOST_DECIMAL_GPU_ENABLED constexpr int num_digits(const uint256_t& x) noexcept
 {
     if (x.high == 0)
     {
@@ -204,7 +204,7 @@ constexpr int num_digits(const uint256_t& x) noexcept
     }
 
     // 10^77
-    auto current_power_of_10 {uint256_t{uint128{UINT64_C(15930919111324522770), UINT64_C(5327493063679123134)}, uint128{UINT64_C(12292710897160462336), UINT64_C(0)}}};
+    uint256_t current_power_of_10 {uint128{UINT64_C(15930919111324522770), UINT64_C(5327493063679123134)}, uint128{UINT64_C(12292710897160462336), UINT64_C(0)}};
 
     for (int i = 78; i > 0; --i)
     {
@@ -225,7 +225,7 @@ constexpr int num_digits(const uint256_t& x) noexcept
 
 #ifdef BOOST_DECIMAL_HAS_INT128
 
-constexpr auto num_digits(const uint128_t& x) noexcept -> int
+BOOST_DECIMAL_GPU_ENABLED constexpr auto num_digits(const uint128_t& x) noexcept -> int
 {
     if (static_cast<std::uint64_t>(x >> 64) == UINT64_C(0))
     {
@@ -240,7 +240,7 @@ constexpr auto num_digits(const uint128_t& x) noexcept -> int
     {
         std::uint32_t mid = (left + right + 1U) / 2U;
 
-        if (x >= impl::emulated_128_pow10[mid])
+        if (x >= pow10<uint128_t>(mid))
         {
             left = mid;
         }
diff --git a/include/boost/decimal/detail/mul_impl.hpp b/include/boost/decimal/detail/mul_impl.hpp
index 6855223e5..aebe18702 100644
--- a/include/boost/decimal/detail/mul_impl.hpp
+++ b/include/boost/decimal/detail/mul_impl.hpp
@@ -5,6 +5,8 @@
 #ifndef BOOST_DECIMAL_DETAIL_MUL_IMPL_HPP
 #define BOOST_DECIMAL_DETAIL_MUL_IMPL_HPP
 
+#include <boost/decimal/detail/config.hpp>
+#include <boost/decimal/detail/type_traits.hpp>
 #include <boost/decimal/detail/attributes.hpp>
 #include <boost/decimal/detail/apply_sign.hpp>
 #include <boost/decimal/detail/fenv_rounding.hpp>
@@ -25,8 +27,10 @@ namespace detail {
 // 2) Returns a struct of the constituent components (used with FMAs)
 
 template <typename ReturnType, typename T, typename U>
-BOOST_DECIMAL_FORCE_INLINE constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                                                   T rhs_sig, U rhs_exp, bool rhs_sign) noexcept -> std::enable_if_t<std::is_same<ReturnType, decimal32_fast>::value, ReturnType>
+BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_FORCE_INLINE 
+constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
+                        T rhs_sig, U rhs_exp, bool rhs_sign) noexcept
+-> BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::is_same<ReturnType, decimal32_fast>::value, ReturnType>
 {
     using mul_type = std::uint_fast64_t;
 
@@ -37,8 +41,10 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lh
 }
 
 template <typename ReturnType, typename T, typename U>
-BOOST_DECIMAL_FORCE_INLINE constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                                                   T rhs_sig, U rhs_exp, bool rhs_sign) noexcept -> std::enable_if_t<std::is_same<ReturnType, decimal32>::value, ReturnType>
+BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_FORCE_INLINE constexpr auto
+mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
+         T rhs_sig, U rhs_exp, bool rhs_sign) noexcept
+-> BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::is_same<ReturnType, decimal32>::value, ReturnType>
 {
     using mul_type = std::uint_fast64_t;
 
@@ -55,8 +61,10 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lh
 }
 
 template <typename ReturnType, typename T, typename U>
-BOOST_DECIMAL_FORCE_INLINE constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                                                   T rhs_sig, U rhs_exp, bool rhs_sign) noexcept -> std::enable_if_t<!detail::is_decimal_floating_point_v<ReturnType>, ReturnType>
+BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_FORCE_INLINE 
+constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
+                        T rhs_sig, U rhs_exp, bool rhs_sign) noexcept 
+-> BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<!detail::is_decimal_floating_point_v<ReturnType>, ReturnType>
 {
     using mul_type = std::uint_fast64_t;
 
@@ -96,9 +104,10 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lh
 }
 
 template <typename ReturnType, BOOST_DECIMAL_INTEGRAL T, BOOST_DECIMAL_INTEGRAL U>
-BOOST_DECIMAL_FORCE_INLINE constexpr auto d64_mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                                                       T rhs_sig, U rhs_exp, bool rhs_sign) noexcept
-                                                       -> std::enable_if_t<detail::is_decimal_floating_point_v<ReturnType>, ReturnType>
+BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_FORCE_INLINE
+constexpr auto d64_mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
+                            T rhs_sig, U rhs_exp, bool rhs_sign) noexcept
+-> BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<detail::is_decimal_floating_point_v<ReturnType>, ReturnType>
 {
     // Clang 6-12 yields incorrect results with builtin u128, so we force usage of our version
     #if defined(BOOST_DECIMAL_HAS_INT128) && (!defined(__clang_major__) || (__clang_major__) > 12)
@@ -123,16 +132,17 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto d64_mul_impl(T lhs_sig, U lhs_exp, boo
 }
 
 template <typename ReturnType, BOOST_DECIMAL_INTEGRAL T, BOOST_DECIMAL_INTEGRAL U>
-BOOST_DECIMAL_FORCE_INLINE constexpr auto d64_mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                                                       T rhs_sig, U rhs_exp, bool rhs_sign) noexcept
-                                                       -> std::enable_if_t<!detail::is_decimal_floating_point_v<ReturnType>, ReturnType>
+BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_FORCE_INLINE 
+constexpr auto d64_mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
+                            T rhs_sig, U rhs_exp, bool rhs_sign) noexcept
+-> BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<!detail::is_decimal_floating_point_v<ReturnType>, ReturnType>
 {
     #if defined(BOOST_DECIMAL_HAS_INT128) && (!defined(__clang_major__) || __clang_major__ > 13)
     using unsigned_int128_type = boost::decimal::detail::uint128_t;
-    constexpr auto comp_value {impl::builtin_128_pow10[31]};
+    constexpr auto comp_value {pow10<unsigned_int128_type>(31)};
     #else
     using unsigned_int128_type = boost::decimal::detail::uint128;
-    constexpr auto comp_value {impl::emulated_128_pow10[31]};
+    constexpr auto comp_value {pow10<unsigned_int128_type>(31)};
     #endif
 
     #ifdef BOOST_DECIMAL_DEBUG
@@ -166,6 +176,7 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto d64_mul_impl(T lhs_sig, U lhs_exp, boo
 }
 
 template <typename ReturnType, typename T1, typename T2>
+BOOST_DECIMAL_GPU_ENABLED
 constexpr auto d128_mul_impl(T1 lhs_sig, std::int32_t lhs_exp, bool lhs_sign,
                              T2 rhs_sig, std::int32_t rhs_exp, bool rhs_sign) noexcept -> ReturnType
 {
@@ -191,6 +202,7 @@ constexpr auto d128_mul_impl(T1 lhs_sig, std::int32_t lhs_exp, bool lhs_sign,
 
 template <typename ReturnType, BOOST_DECIMAL_INTEGRAL T1, BOOST_DECIMAL_INTEGRAL U1,
                                BOOST_DECIMAL_INTEGRAL T2, BOOST_DECIMAL_INTEGRAL U2>
+BOOST_DECIMAL_GPU_ENABLED
 constexpr auto d128_fast_mul_impl(T1 lhs_sig, U1 lhs_exp, bool lhs_sign,
                                   T2 rhs_sig, U2 rhs_exp, bool rhs_sign) noexcept -> ReturnType
 {
diff --git a/include/boost/decimal/detail/normalize.hpp b/include/boost/decimal/detail/normalize.hpp
index e5fce0bb3..6fafd29a6 100644
--- a/include/boost/decimal/detail/normalize.hpp
+++ b/include/boost/decimal/detail/normalize.hpp
@@ -6,6 +6,7 @@
 #define BOOST_DECIMAL_DETAIL_NORMALIZE_HPP
 
 #include <boost/decimal/fwd.hpp>
+#include <boost/decimal/detail/config.hpp>
 #include <boost/decimal/detail/integer_search_trees.hpp>
 #include <boost/decimal/detail/fenv_rounding.hpp>
 #include <boost/decimal/detail/attributes.hpp>
@@ -17,7 +18,7 @@ namespace detail {
 
 // Converts the significand to full precision to remove the effects of cohorts
 template <typename TargetDecimalType = decimal32, typename T1, typename T2>
-constexpr auto normalize(T1& significand, T2& exp, bool sign = false) noexcept -> void
+BOOST_DECIMAL_GPU_ENABLED constexpr auto normalize(T1& significand, T2& exp, bool sign = false) noexcept -> void
 {
     constexpr auto target_precision {detail::precision_v<TargetDecimalType>};
     const auto digits {num_digits(significand)};
diff --git a/include/boost/decimal/detail/numeric_limits.hpp b/include/boost/decimal/detail/numeric_limits.hpp
new file mode 100644
index 000000000..9e039677d
--- /dev/null
+++ b/include/boost/decimal/detail/numeric_limits.hpp
@@ -0,0 +1,467 @@
+//  Copyright (c) 2024 Matt Borland
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+//  Regular use of std::numeric_limits functions can not be used on
+//  GPU platforms like CUDA since they are missing the __device__ marker
+//  and libcu++ does not provide something analogous.
+//  Rather than using giant if else blocks make our own version of numeric limits
+
+#ifndef BOOST_DECIMAL_TOOLS_NUMERIC_LIMITS_HPP
+#define BOOST_DECIMAL_TOOLS_NUMERIC_LIMITS_HPP
+
+#include <boost/decimal/detail/config.hpp>
+#include <type_traits>
+#include <limits>
+#include <climits>
+#include <cfloat>
+
+namespace boost {
+namespace decimal {
+namespace detail {
+
+template <typename T>
+struct numeric_limits : public std::numeric_limits<T> {};
+
+#ifdef BOOST_DECIMAL_HAS_GPU_SUPPORT
+
+template <>
+struct numeric_limits<float>
+{
+    BOOST_DECIMAL_STATIC constexpr bool is_specialized = std::numeric_limits<float>::is_specialized;
+    BOOST_DECIMAL_STATIC constexpr bool is_signed = std::numeric_limits<float>::is_signed;
+    BOOST_DECIMAL_STATIC constexpr bool is_integer = std::numeric_limits<float>::is_integer;
+    BOOST_DECIMAL_STATIC constexpr bool is_exact = std::numeric_limits<float>::is_exact;
+    BOOST_DECIMAL_STATIC constexpr bool has_infinity = std::numeric_limits<float>::has_infinity;
+    BOOST_DECIMAL_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<float>::has_quiet_NaN;
+    BOOST_DECIMAL_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<float>::has_signaling_NaN;
+
+    BOOST_DECIMAL_STATIC constexpr std::float_round_style round_style = std::numeric_limits<float>::round_style;
+    BOOST_DECIMAL_STATIC constexpr bool is_iec559 = std::numeric_limits<float>::is_iec559;
+    BOOST_DECIMAL_STATIC constexpr bool is_bounded = std::numeric_limits<float>::is_bounded;
+    BOOST_DECIMAL_STATIC constexpr bool is_modulo = std::numeric_limits<float>::is_modulo;
+    BOOST_DECIMAL_STATIC constexpr int digits = std::numeric_limits<float>::digits;
+    BOOST_DECIMAL_STATIC constexpr int digits10 = std::numeric_limits<float>::digits10;
+    BOOST_DECIMAL_STATIC constexpr int max_digits10 = std::numeric_limits<float>::max_digits10;
+    BOOST_DECIMAL_STATIC constexpr int radix = std::numeric_limits<float>::radix;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent = std::numeric_limits<float>::min_exponent;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent10 = std::numeric_limits<float>::min_exponent10;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent = std::numeric_limits<float>::max_exponent;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent10 = std::numeric_limits<float>::max_exponent10;
+    BOOST_DECIMAL_STATIC constexpr bool traps = std::numeric_limits<float>::traps;
+    BOOST_DECIMAL_STATIC constexpr bool tinyness_before = std::numeric_limits<float>::tinyness_before;
+
+    // Member Functions
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr float (min)         () { return FLT_MIN; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr float (max)         () { return FLT_MAX; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr float lowest        () { return -FLT_MAX; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr float epsilon       () { return FLT_EPSILON; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr float round_error   () { return 0.5F; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr float infinity      () { return static_cast<float>(INFINITY); }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr float quiet_NaN     () { return static_cast<float>(NAN); }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr float signaling_NaN ()
+    {
+        #ifdef FLT_SNAN
+        return FLT_SNAN;
+        #else
+        return static_cast<float>(NAN);
+        #endif
+    }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr float denorm_min    () { return FLT_TRUE_MIN; }
+};
+
+template <>
+struct numeric_limits<double>
+{
+    BOOST_DECIMAL_STATIC constexpr bool is_specialized = std::numeric_limits<double>::is_specialized;
+    BOOST_DECIMAL_STATIC constexpr bool is_signed = std::numeric_limits<double>::is_signed;
+    BOOST_DECIMAL_STATIC constexpr bool is_integer = std::numeric_limits<double>::is_integer;
+    BOOST_DECIMAL_STATIC constexpr bool is_exact = std::numeric_limits<double>::is_exact;
+    BOOST_DECIMAL_STATIC constexpr bool has_infinity = std::numeric_limits<double>::has_infinity;
+    BOOST_DECIMAL_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<double>::has_quiet_NaN;
+    BOOST_DECIMAL_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<double>::has_signaling_NaN;
+
+    BOOST_DECIMAL_STATIC constexpr std::float_round_style round_style = std::numeric_limits<double>::round_style;
+    BOOST_DECIMAL_STATIC constexpr bool is_iec559 = std::numeric_limits<double>::is_iec559;
+    BOOST_DECIMAL_STATIC constexpr bool is_bounded = std::numeric_limits<double>::is_bounded;
+    BOOST_DECIMAL_STATIC constexpr bool is_modulo = std::numeric_limits<double>::is_modulo;
+    BOOST_DECIMAL_STATIC constexpr int digits = std::numeric_limits<double>::digits;
+    BOOST_DECIMAL_STATIC constexpr int digits10 = std::numeric_limits<double>::digits10;
+    BOOST_DECIMAL_STATIC constexpr int max_digits10 = std::numeric_limits<double>::max_digits10;
+    BOOST_DECIMAL_STATIC constexpr int radix = std::numeric_limits<double>::radix;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent = std::numeric_limits<double>::min_exponent;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent10 = std::numeric_limits<double>::min_exponent10;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent = std::numeric_limits<double>::max_exponent;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent10 = std::numeric_limits<double>::max_exponent10;
+    BOOST_DECIMAL_STATIC constexpr bool traps = std::numeric_limits<double>::traps;
+    BOOST_DECIMAL_STATIC constexpr bool tinyness_before = std::numeric_limits<double>::tinyness_before;
+
+    // Member Functions
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr double (min)         () { return DBL_MIN; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr double (max)         () { return DBL_MAX; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr double lowest        () { return -DBL_MAX; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr double epsilon       () { return DBL_EPSILON; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr double round_error   () { return 0.5; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr double infinity      () { return static_cast<double>(INFINITY); }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr double quiet_NaN     () { return static_cast<double>(NAN); }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr double signaling_NaN ()
+    {
+        #ifdef DBL_SNAN
+        return DBL_SNAN;
+        #else
+        return static_cast<double>(NAN);
+        #endif
+    }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr double denorm_min    () { return DBL_TRUE_MIN; }
+};
+
+template <>
+struct numeric_limits<short>
+{
+    BOOST_DECIMAL_STATIC constexpr bool is_specialized = std::numeric_limits<short>::is_specialized;
+    BOOST_DECIMAL_STATIC constexpr bool is_signed = std::numeric_limits<short>::is_signed;
+    BOOST_DECIMAL_STATIC constexpr bool is_integer = std::numeric_limits<short>::is_integer;
+    BOOST_DECIMAL_STATIC constexpr bool is_exact = std::numeric_limits<short>::is_exact;
+    BOOST_DECIMAL_STATIC constexpr bool has_infinity = std::numeric_limits<short>::has_infinity;
+    BOOST_DECIMAL_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<short>::has_quiet_NaN;
+    BOOST_DECIMAL_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<short>::has_signaling_NaN;
+
+    BOOST_DECIMAL_STATIC constexpr std::float_round_style round_style = std::numeric_limits<short>::round_style;
+    BOOST_DECIMAL_STATIC constexpr bool is_iec559 = std::numeric_limits<short>::is_iec559;
+    BOOST_DECIMAL_STATIC constexpr bool is_bounded = std::numeric_limits<short>::is_bounded;
+    BOOST_DECIMAL_STATIC constexpr bool is_modulo = std::numeric_limits<short>::is_modulo;
+    BOOST_DECIMAL_STATIC constexpr int digits = std::numeric_limits<short>::digits;
+    BOOST_DECIMAL_STATIC constexpr int digits10 = std::numeric_limits<short>::digits10;
+    BOOST_DECIMAL_STATIC constexpr int max_digits10 = std::numeric_limits<short>::max_digits10;
+    BOOST_DECIMAL_STATIC constexpr int radix = std::numeric_limits<short>::radix;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent = std::numeric_limits<short>::min_exponent;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent10 = std::numeric_limits<short>::min_exponent10;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent = std::numeric_limits<short>::max_exponent;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent10 = std::numeric_limits<short>::max_exponent10;
+    BOOST_DECIMAL_STATIC constexpr bool traps = std::numeric_limits<short>::traps;
+    BOOST_DECIMAL_STATIC constexpr bool tinyness_before = std::numeric_limits<short>::tinyness_before;
+
+    // Member Functions
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr short (min)         () { return SHRT_MIN; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr short (max)         () { return SHRT_MAX; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr short lowest        () { return SHRT_MIN; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr short epsilon       () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr short round_error   () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr short infinity      () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr short quiet_NaN     () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr short signaling_NaN () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr short denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned short>
+{
+    BOOST_DECIMAL_STATIC constexpr bool is_specialized = std::numeric_limits<unsigned short>::is_specialized;
+    BOOST_DECIMAL_STATIC constexpr bool is_signed = std::numeric_limits<unsigned short>::is_signed;
+    BOOST_DECIMAL_STATIC constexpr bool is_integer = std::numeric_limits<unsigned short>::is_integer;
+    BOOST_DECIMAL_STATIC constexpr bool is_exact = std::numeric_limits<unsigned short>::is_exact;
+    BOOST_DECIMAL_STATIC constexpr bool has_infinity = std::numeric_limits<unsigned short>::has_infinity;
+    BOOST_DECIMAL_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<unsigned short>::has_quiet_NaN;
+    BOOST_DECIMAL_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<unsigned short>::has_signaling_NaN;
+
+    BOOST_DECIMAL_STATIC constexpr std::float_round_style round_style = std::numeric_limits<unsigned short>::round_style;
+    BOOST_DECIMAL_STATIC constexpr bool is_iec559 = std::numeric_limits<unsigned short>::is_iec559;
+    BOOST_DECIMAL_STATIC constexpr bool is_bounded = std::numeric_limits<unsigned short>::is_bounded;
+    BOOST_DECIMAL_STATIC constexpr bool is_modulo = std::numeric_limits<unsigned short>::is_modulo;
+    BOOST_DECIMAL_STATIC constexpr int digits = std::numeric_limits<unsigned short>::digits;
+    BOOST_DECIMAL_STATIC constexpr int digits10 = std::numeric_limits<unsigned short>::digits10;
+    BOOST_DECIMAL_STATIC constexpr int max_digits10 = std::numeric_limits<unsigned short>::max_digits10;
+    BOOST_DECIMAL_STATIC constexpr int radix = std::numeric_limits<unsigned short>::radix;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent = std::numeric_limits<unsigned short>::min_exponent;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent10 = std::numeric_limits<unsigned short>::min_exponent10;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent = std::numeric_limits<unsigned short>::max_exponent;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent10 = std::numeric_limits<unsigned short>::max_exponent10;
+    BOOST_DECIMAL_STATIC constexpr bool traps = std::numeric_limits<unsigned short>::traps;
+    BOOST_DECIMAL_STATIC constexpr bool tinyness_before = std::numeric_limits<unsigned short>::tinyness_before;
+
+    // Member Functions
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned short (min)         () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned short (max)         () { return USHRT_MAX; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned short lowest        () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned short epsilon       () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned short round_error   () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned short infinity      () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned short quiet_NaN     () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned short signaling_NaN () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned short denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<int>
+{
+    BOOST_DECIMAL_STATIC constexpr bool is_specialized = std::numeric_limits<int>::is_specialized;
+    BOOST_DECIMAL_STATIC constexpr bool is_signed = std::numeric_limits<int>::is_signed;
+    BOOST_DECIMAL_STATIC constexpr bool is_integer = std::numeric_limits<int>::is_integer;
+    BOOST_DECIMAL_STATIC constexpr bool is_exact = std::numeric_limits<int>::is_exact;
+    BOOST_DECIMAL_STATIC constexpr bool has_infinity = std::numeric_limits<int>::has_infinity;
+    BOOST_DECIMAL_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<int>::has_quiet_NaN;
+    BOOST_DECIMAL_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<int>::has_signaling_NaN;
+
+    BOOST_DECIMAL_STATIC constexpr std::float_round_style round_style = std::numeric_limits<int>::round_style;
+    BOOST_DECIMAL_STATIC constexpr bool is_iec559 = std::numeric_limits<int>::is_iec559;
+    BOOST_DECIMAL_STATIC constexpr bool is_bounded = std::numeric_limits<int>::is_bounded;
+    BOOST_DECIMAL_STATIC constexpr bool is_modulo = std::numeric_limits<int>::is_modulo;
+    BOOST_DECIMAL_STATIC constexpr int digits = std::numeric_limits<int>::digits;
+    BOOST_DECIMAL_STATIC constexpr int digits10 = std::numeric_limits<int>::digits10;
+    BOOST_DECIMAL_STATIC constexpr int max_digits10 = std::numeric_limits<int>::max_digits10;
+    BOOST_DECIMAL_STATIC constexpr int radix = std::numeric_limits<int>::radix;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent = std::numeric_limits<int>::min_exponent;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent10 = std::numeric_limits<int>::min_exponent10;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent = std::numeric_limits<int>::max_exponent;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent10 = std::numeric_limits<int>::max_exponent10;
+    BOOST_DECIMAL_STATIC constexpr bool traps = std::numeric_limits<int>::traps;
+    BOOST_DECIMAL_STATIC constexpr bool tinyness_before = std::numeric_limits<int>::tinyness_before;
+
+    // Member Functions
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr int (min)         () { return INT_MIN; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr int (max)         () { return INT_MAX; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr int lowest        () { return INT_MIN; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr int epsilon       () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr int round_error   () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr int infinity      () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr int quiet_NaN     () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr int signaling_NaN () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr int denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned int>
+{
+    BOOST_DECIMAL_STATIC constexpr bool is_specialized = std::numeric_limits<unsigned int>::is_specialized;
+    BOOST_DECIMAL_STATIC constexpr bool is_signed = std::numeric_limits<unsigned int>::is_signed;
+    BOOST_DECIMAL_STATIC constexpr bool is_integer = std::numeric_limits<unsigned int>::is_integer;
+    BOOST_DECIMAL_STATIC constexpr bool is_exact = std::numeric_limits<unsigned int>::is_exact;
+    BOOST_DECIMAL_STATIC constexpr bool has_infinity = std::numeric_limits<unsigned int>::has_infinity;
+    BOOST_DECIMAL_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<unsigned int>::has_quiet_NaN;
+    BOOST_DECIMAL_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<unsigned int>::has_signaling_NaN;
+
+    BOOST_DECIMAL_STATIC constexpr std::float_round_style round_style = std::numeric_limits<unsigned int>::round_style;
+    BOOST_DECIMAL_STATIC constexpr bool is_iec559 = std::numeric_limits<unsigned int>::is_iec559;
+    BOOST_DECIMAL_STATIC constexpr bool is_bounded = std::numeric_limits<unsigned int>::is_bounded;
+    BOOST_DECIMAL_STATIC constexpr bool is_modulo = std::numeric_limits<unsigned int>::is_modulo;
+    BOOST_DECIMAL_STATIC constexpr int digits = std::numeric_limits<unsigned int>::digits;
+    BOOST_DECIMAL_STATIC constexpr int digits10 = std::numeric_limits<unsigned int>::digits10;
+    BOOST_DECIMAL_STATIC constexpr int max_digits10 = std::numeric_limits<unsigned int>::max_digits10;
+    BOOST_DECIMAL_STATIC constexpr int radix = std::numeric_limits<unsigned int>::radix;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent = std::numeric_limits<unsigned int>::min_exponent;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent10 = std::numeric_limits<unsigned int>::min_exponent10;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent = std::numeric_limits<unsigned int>::max_exponent;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent10 = std::numeric_limits<unsigned int>::max_exponent10;
+    BOOST_DECIMAL_STATIC constexpr bool traps = std::numeric_limits<unsigned int>::traps;
+    BOOST_DECIMAL_STATIC constexpr bool tinyness_before = std::numeric_limits<unsigned int>::tinyness_before;
+
+    // Member Functions
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned int (min)         () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned int (max)         () { return UINT_MAX; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned int lowest        () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned int epsilon       () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned int round_error   () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned int infinity      () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned int quiet_NaN     () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned int signaling_NaN () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned int denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<long>
+{
+    BOOST_DECIMAL_STATIC constexpr bool is_specialized = std::numeric_limits<long>::is_specialized;
+    BOOST_DECIMAL_STATIC constexpr bool is_signed = std::numeric_limits<long>::is_signed;
+    BOOST_DECIMAL_STATIC constexpr bool is_integer = std::numeric_limits<long>::is_integer;
+    BOOST_DECIMAL_STATIC constexpr bool is_exact = std::numeric_limits<long>::is_exact;
+    BOOST_DECIMAL_STATIC constexpr bool has_infinity = std::numeric_limits<long>::has_infinity;
+    BOOST_DECIMAL_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<long>::has_quiet_NaN;
+    BOOST_DECIMAL_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<long>::has_signaling_NaN;
+
+    BOOST_DECIMAL_STATIC constexpr std::float_round_style round_style = std::numeric_limits<long>::round_style;
+    BOOST_DECIMAL_STATIC constexpr bool is_iec559 = std::numeric_limits<long>::is_iec559;
+    BOOST_DECIMAL_STATIC constexpr bool is_bounded = std::numeric_limits<long>::is_bounded;
+    BOOST_DECIMAL_STATIC constexpr bool is_modulo = std::numeric_limits<long>::is_modulo;
+    BOOST_DECIMAL_STATIC constexpr int digits = std::numeric_limits<long>::digits;
+    BOOST_DECIMAL_STATIC constexpr int digits10 = std::numeric_limits<long>::digits10;
+    BOOST_DECIMAL_STATIC constexpr int max_digits10 = std::numeric_limits<long>::max_digits10;
+    BOOST_DECIMAL_STATIC constexpr int radix = std::numeric_limits<long>::radix;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent = std::numeric_limits<long>::min_exponent;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent10 = std::numeric_limits<long>::min_exponent10;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent = std::numeric_limits<long>::max_exponent;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent10 = std::numeric_limits<long>::max_exponent10;
+    BOOST_DECIMAL_STATIC constexpr bool traps = std::numeric_limits<long>::traps;
+    BOOST_DECIMAL_STATIC constexpr bool tinyness_before = std::numeric_limits<long>::tinyness_before;
+
+    // Member Functions
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long (min)         () { return LONG_MIN; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long (max)         () { return LONG_MAX; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long lowest        () { return LONG_MIN; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long epsilon       () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long round_error   () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long infinity      () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long quiet_NaN     () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long signaling_NaN () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned long>
+{
+    BOOST_DECIMAL_STATIC constexpr bool is_specialized = std::numeric_limits<unsigned long>::is_specialized;
+    BOOST_DECIMAL_STATIC constexpr bool is_signed = std::numeric_limits<unsigned long>::is_signed;
+    BOOST_DECIMAL_STATIC constexpr bool is_integer = std::numeric_limits<unsigned long>::is_integer;
+    BOOST_DECIMAL_STATIC constexpr bool is_exact = std::numeric_limits<unsigned long>::is_exact;
+    BOOST_DECIMAL_STATIC constexpr bool has_infinity = std::numeric_limits<unsigned long>::has_infinity;
+    BOOST_DECIMAL_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<unsigned long>::has_quiet_NaN;
+    BOOST_DECIMAL_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<unsigned long>::has_signaling_NaN;
+
+    BOOST_DECIMAL_STATIC constexpr std::float_round_style round_style = std::numeric_limits<unsigned long>::round_style;
+    BOOST_DECIMAL_STATIC constexpr bool is_iec559 = std::numeric_limits<unsigned long>::is_iec559;
+    BOOST_DECIMAL_STATIC constexpr bool is_bounded = std::numeric_limits<unsigned long>::is_bounded;
+    BOOST_DECIMAL_STATIC constexpr bool is_modulo = std::numeric_limits<unsigned long>::is_modulo;
+    BOOST_DECIMAL_STATIC constexpr int digits = std::numeric_limits<unsigned long>::digits;
+    BOOST_DECIMAL_STATIC constexpr int digits10 = std::numeric_limits<unsigned long>::digits10;
+    BOOST_DECIMAL_STATIC constexpr int max_digits10 = std::numeric_limits<unsigned long>::max_digits10;
+    BOOST_DECIMAL_STATIC constexpr int radix = std::numeric_limits<unsigned long>::radix;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent = std::numeric_limits<unsigned long>::min_exponent;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent10 = std::numeric_limits<unsigned long>::min_exponent10;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent = std::numeric_limits<unsigned long>::max_exponent;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent10 = std::numeric_limits<unsigned long>::max_exponent10;
+    BOOST_DECIMAL_STATIC constexpr bool traps = std::numeric_limits<unsigned long>::traps;
+    BOOST_DECIMAL_STATIC constexpr bool tinyness_before = std::numeric_limits<unsigned long>::tinyness_before;
+
+    // Member Functions
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long (min)         () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long (max)         () { return ULONG_MAX; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long lowest        () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long epsilon       () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long round_error   () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long infinity      () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long quiet_NaN     () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long signaling_NaN () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<long long>
+{
+    BOOST_DECIMAL_STATIC constexpr bool is_specialized = std::numeric_limits<long long>::is_specialized;
+    BOOST_DECIMAL_STATIC constexpr bool is_signed = std::numeric_limits<long long>::is_signed;
+    BOOST_DECIMAL_STATIC constexpr bool is_integer = std::numeric_limits<long long>::is_integer;
+    BOOST_DECIMAL_STATIC constexpr bool is_exact = std::numeric_limits<long long>::is_exact;
+    BOOST_DECIMAL_STATIC constexpr bool has_infinity = std::numeric_limits<long long>::has_infinity;
+    BOOST_DECIMAL_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<long long>::has_quiet_NaN;
+    BOOST_DECIMAL_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<long long>::has_signaling_NaN;
+
+    BOOST_DECIMAL_STATIC constexpr std::float_round_style round_style = std::numeric_limits<long long>::round_style;
+    BOOST_DECIMAL_STATIC constexpr bool is_iec559 = std::numeric_limits<long long>::is_iec559;
+    BOOST_DECIMAL_STATIC constexpr bool is_bounded = std::numeric_limits<long long>::is_bounded;
+    BOOST_DECIMAL_STATIC constexpr bool is_modulo = std::numeric_limits<long long>::is_modulo;
+    BOOST_DECIMAL_STATIC constexpr int digits = std::numeric_limits<long long>::digits;
+    BOOST_DECIMAL_STATIC constexpr int digits10 = std::numeric_limits<long long>::digits10;
+    BOOST_DECIMAL_STATIC constexpr int max_digits10 = std::numeric_limits<long long>::max_digits10;
+    BOOST_DECIMAL_STATIC constexpr int radix = std::numeric_limits<long long>::radix;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent = std::numeric_limits<long long>::min_exponent;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent10 = std::numeric_limits<long long>::min_exponent10;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent = std::numeric_limits<long long>::max_exponent;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent10 = std::numeric_limits<long long>::max_exponent10;
+    BOOST_DECIMAL_STATIC constexpr bool traps = std::numeric_limits<long long>::traps;
+    BOOST_DECIMAL_STATIC constexpr bool tinyness_before = std::numeric_limits<long long>::tinyness_before;
+
+    // Member Functions
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long long (min)         () { return LLONG_MIN; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long long (max)         () { return LLONG_MAX; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long long lowest        () { return LLONG_MIN; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long long epsilon       () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long long round_error   () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long long infinity      () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long long quiet_NaN     () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long long signaling_NaN () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr long long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned long long>
+{
+    BOOST_DECIMAL_STATIC constexpr bool is_specialized = std::numeric_limits<unsigned long long>::is_specialized;
+    BOOST_DECIMAL_STATIC constexpr bool is_signed = std::numeric_limits<unsigned long long>::is_signed;
+    BOOST_DECIMAL_STATIC constexpr bool is_integer = std::numeric_limits<unsigned long long>::is_integer;
+    BOOST_DECIMAL_STATIC constexpr bool is_exact = std::numeric_limits<unsigned long long>::is_exact;
+    BOOST_DECIMAL_STATIC constexpr bool has_infinity = std::numeric_limits<unsigned long long>::has_infinity;
+    BOOST_DECIMAL_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<unsigned long long>::has_quiet_NaN;
+    BOOST_DECIMAL_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<unsigned long long>::has_signaling_NaN;
+
+    BOOST_DECIMAL_STATIC constexpr std::float_round_style round_style = std::numeric_limits<unsigned long long>::round_style;
+    BOOST_DECIMAL_STATIC constexpr bool is_iec559 = std::numeric_limits<unsigned long long>::is_iec559;
+    BOOST_DECIMAL_STATIC constexpr bool is_bounded = std::numeric_limits<unsigned long long>::is_bounded;
+    BOOST_DECIMAL_STATIC constexpr bool is_modulo = std::numeric_limits<unsigned long long>::is_modulo;
+    BOOST_DECIMAL_STATIC constexpr int digits = std::numeric_limits<unsigned long long>::digits;
+    BOOST_DECIMAL_STATIC constexpr int digits10 = std::numeric_limits<unsigned long long>::digits10;
+    BOOST_DECIMAL_STATIC constexpr int max_digits10 = std::numeric_limits<unsigned long long>::max_digits10;
+    BOOST_DECIMAL_STATIC constexpr int radix = std::numeric_limits<unsigned long long>::radix;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent = std::numeric_limits<unsigned long long>::min_exponent;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent10 = std::numeric_limits<unsigned long long>::min_exponent10;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent = std::numeric_limits<unsigned long long>::max_exponent;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent10 = std::numeric_limits<unsigned long long>::max_exponent10;
+    BOOST_DECIMAL_STATIC constexpr bool traps = std::numeric_limits<unsigned long long>::traps;
+    BOOST_DECIMAL_STATIC constexpr bool tinyness_before = std::numeric_limits<unsigned long long>::tinyness_before;
+
+    // Member Functions
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long long (min)         () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long long (max)         () { return ULLONG_MAX; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long long lowest        () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long long epsilon       () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long long round_error   () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long long infinity      () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long long quiet_NaN     () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long long signaling_NaN () { return 0; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr unsigned long long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<bool>
+{
+    BOOST_DECIMAL_STATIC constexpr bool is_specialized = std::numeric_limits<bool>::is_specialized;
+    BOOST_DECIMAL_STATIC constexpr bool is_signed = std::numeric_limits<bool>::is_signed;
+    BOOST_DECIMAL_STATIC constexpr bool is_integer = std::numeric_limits<bool>::is_integer;
+    BOOST_DECIMAL_STATIC constexpr bool is_exact = std::numeric_limits<bool>::is_exact;
+    BOOST_DECIMAL_STATIC constexpr bool has_infinity = std::numeric_limits<bool>::has_infinity;
+    BOOST_DECIMAL_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<bool>::has_quiet_NaN;
+    BOOST_DECIMAL_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<bool>::has_signaling_NaN;
+
+    BOOST_DECIMAL_STATIC constexpr std::float_round_style round_style = std::numeric_limits<bool>::round_style;
+    BOOST_DECIMAL_STATIC constexpr bool is_iec559 = std::numeric_limits<bool>::is_iec559;
+    BOOST_DECIMAL_STATIC constexpr bool is_bounded = std::numeric_limits<bool>::is_bounded;
+    BOOST_DECIMAL_STATIC constexpr bool is_modulo = std::numeric_limits<bool>::is_modulo;
+    BOOST_DECIMAL_STATIC constexpr int digits = std::numeric_limits<bool>::digits;
+    BOOST_DECIMAL_STATIC constexpr int digits10 = std::numeric_limits<bool>::digits10;
+    BOOST_DECIMAL_STATIC constexpr int max_digits10 = std::numeric_limits<bool>::max_digits10;
+    BOOST_DECIMAL_STATIC constexpr int radix = std::numeric_limits<bool>::radix;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent = std::numeric_limits<bool>::min_exponent;
+    BOOST_DECIMAL_STATIC constexpr int min_exponent10 = std::numeric_limits<bool>::min_exponent10;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent = std::numeric_limits<bool>::max_exponent;
+    BOOST_DECIMAL_STATIC constexpr int max_exponent10 = std::numeric_limits<bool>::max_exponent10;
+    BOOST_DECIMAL_STATIC constexpr bool traps = std::numeric_limits<bool>::traps;
+    BOOST_DECIMAL_STATIC constexpr bool tinyness_before = std::numeric_limits<bool>::tinyness_before;
+
+    // Member Functions
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr bool (min)         () { return false; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr bool (max)         () { return true; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr bool lowest        () { return false; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr bool epsilon       () { return false; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr bool round_error   () { return false; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr bool infinity      () { return false; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr bool quiet_NaN     () { return false; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr bool signaling_NaN () { return false; }
+    BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_STATIC constexpr bool denorm_min    () { return false; }
+};
+
+#endif // BOOST_DECIMAL_HAS_GPU_SUPPORT
+
+} // namespace detail
+} // namespace decimal
+} // namespace boost
+
+#endif
diff --git a/include/boost/decimal/detail/power_tables.hpp b/include/boost/decimal/detail/power_tables.hpp
index 24cae2b2e..d892826e5 100644
--- a/include/boost/decimal/detail/power_tables.hpp
+++ b/include/boost/decimal/detail/power_tables.hpp
@@ -14,6 +14,8 @@
 #include <cstdint>
 #endif
 
+#ifndef BOOST_DECIMAL_ENABLE_CUDA
+
 namespace boost {
 namespace decimal {
 namespace detail {
@@ -210,7 +212,7 @@ static_assert(sizeof(emulated_256_pow10) == sizeof(boost::decimal::detail::uint2
 } // namespace impl
 
 template <typename T>
-constexpr auto pow10(T n) noexcept -> T
+BOOST_DECIMAL_FORCE_INLINE constexpr auto pow10(T n) noexcept -> T
 {
     return static_cast<T>(impl::powers_of_10[static_cast<std::size_t>(n)]);
 }
@@ -221,12 +223,12 @@ constexpr auto pow10(T n) noexcept -> T
 #endif
 
 template <>
-constexpr auto pow10(detail::uint128 n) noexcept -> detail::uint128
+BOOST_DECIMAL_FORCE_INLINE constexpr auto pow10(detail::uint128 n) noexcept -> detail::uint128
 {
     return impl::emulated_128_pow10[static_cast<std::size_t>(n.low)];
 }
 
-constexpr auto pow10(const uint256_t& n) noexcept -> uint256_t
+BOOST_DECIMAL_FORCE_INLINE constexpr auto pow10(const uint256_t& n) noexcept -> uint256_t
 {
     return impl::emulated_256_pow10[static_cast<std::size_t>(n.low.low)];
 }
@@ -234,7 +236,7 @@ constexpr auto pow10(const uint256_t& n) noexcept -> uint256_t
 #ifdef BOOST_DECIMAL_HAS_INT128
 
 template <>
-constexpr auto pow10(detail::uint128_t n) noexcept -> detail::uint128_t
+BOOST_DECIMAL_FORCE_INLINE constexpr auto pow10(detail::uint128_t n) noexcept -> detail::uint128_t
 {
     return impl::builtin_128_pow10[static_cast<std::size_t>(n)];
 }
@@ -249,4 +251,219 @@ constexpr auto pow10(detail::uint128_t n) noexcept -> detail::uint128_t
 } // namespace decimal
 } // namespace boost
 
+#else // CUDA Handling
+
+namespace boost {
+namespace decimal {
+namespace detail {
+
+template <typename T>
+BOOST_DECIMAL_GPU_ENABLED constexpr auto pow10(T n) noexcept -> T
+{
+    constexpr std::uint64_t powers_of_10[20] =
+    {
+        UINT64_C(1), UINT64_C(10), UINT64_C(100), UINT64_C(1000), UINT64_C(10000), UINT64_C(100000), UINT64_C(1000000),
+        UINT64_C(10000000), UINT64_C(100000000), UINT64_C(1000000000), UINT64_C(10000000000), UINT64_C(100000000000),
+        UINT64_C(1000000000000), UINT64_C(10000000000000), UINT64_C(100000000000000), UINT64_C(1000000000000000),
+        UINT64_C(10000000000000000), UINT64_C(100000000000000000), UINT64_C(1000000000000000000),
+        UINT64_C(10000000000000000000)
+    };
+
+    return static_cast<T>(powers_of_10[static_cast<std::size_t>(n)]);
+}
+
+
+template <>
+BOOST_DECIMAL_GPU_ENABLED constexpr auto pow10(detail::uint128 n) noexcept -> detail::uint128
+{
+    constexpr uint128 emulated_128_pow10[] =
+    {
+        uint128 {UINT64_C(0), UINT64_C(1)},
+        uint128 {UINT64_C(0), UINT64_C(10)},
+        uint128 {UINT64_C(0), UINT64_C(100)},
+        uint128 {UINT64_C(0), UINT64_C(1000)},
+        uint128 {UINT64_C(0), UINT64_C(10000)},
+        uint128 {UINT64_C(0), UINT64_C(100000)},
+        uint128 {UINT64_C(0), UINT64_C(1000000)},
+        uint128 {UINT64_C(0), UINT64_C(10000000)},
+        uint128 {UINT64_C(0), UINT64_C(100000000)},
+        uint128 {UINT64_C(0), UINT64_C(1000000000)},
+        uint128 {UINT64_C(0), UINT64_C(10000000000)},
+        uint128 {UINT64_C(0), UINT64_C(100000000000)},
+        uint128 {UINT64_C(0), UINT64_C(1000000000000)},
+        uint128 {UINT64_C(0), UINT64_C(10000000000000)},
+        uint128 {UINT64_C(0), UINT64_C(100000000000000)},
+        uint128 {UINT64_C(0), UINT64_C(1000000000000000)},
+        uint128 {UINT64_C(0), UINT64_C(10000000000000000)},
+        uint128 {UINT64_C(0), UINT64_C(100000000000000000)},
+        uint128 {UINT64_C(0), UINT64_C(1000000000000000000)},
+        uint128 {UINT64_C(0), UINT64_C(10000000000000000000)},
+        uint128 {UINT64_C(5), UINT64_C(7766279631452241920)},
+        uint128 {UINT64_C(54), UINT64_C(3875820019684212736)},
+        uint128 {UINT64_C(542), UINT64_C(1864712049423024128)},
+        uint128 {UINT64_C(5421), UINT64_C(200376420520689664)},
+        uint128 {UINT64_C(54210), UINT64_C(2003764205206896640)},
+        uint128 {UINT64_C(542101), UINT64_C(1590897978359414784)},
+        uint128 {UINT64_C(5421010), UINT64_C(15908979783594147840)},
+        uint128 {UINT64_C(54210108), UINT64_C(11515845246265065472)},
+        uint128 {UINT64_C(542101086), UINT64_C(4477988020393345024)},
+        uint128 {UINT64_C(5421010862), UINT64_C(7886392056514347008)},
+        uint128 {UINT64_C(54210108624), UINT64_C(5076944270305263616)},
+        uint128 {UINT64_C(542101086242), UINT64_C(13875954555633532928)},
+        uint128 {UINT64_C(5421010862427), UINT64_C(9632337040368467968)},
+        uint128 {UINT64_C(54210108624275), UINT64_C(4089650035136921600)},
+        uint128 {UINT64_C(542101086242752), UINT64_C(4003012203950112768)},
+        uint128 {UINT64_C(5421010862427522), UINT64_C(3136633892082024448)},
+        uint128 {UINT64_C(54210108624275221), UINT64_C(12919594847110692864)},
+        uint128 {UINT64_C(542101086242752217), UINT64_C(68739955140067328)},
+        uint128 {UINT64_C(5421010862427522170), UINT64_C(687399551400673280)},
+        uint128 {UINT64_C(17316620476856118468), UINT64_C(6873995514006732800)},
+    };
+
+    return emulated_128_pow10[static_cast<std::size_t>(n.low)];
+}
+
+template <>
+constexpr auto pow10(detail::uint128_t n) noexcept -> detail::uint128_t
+{
+    constexpr uint128_t builtin_128_pow10[] = {
+        uint128_t(1),
+        uint128_t(10),
+        uint128_t(100),
+        uint128_t(1000),
+        uint128_t(10000),
+        uint128_t(100000),
+        uint128_t(1000000),
+        uint128_t(10000000),
+        uint128_t(100000000),
+        uint128_t(1000000000),
+        uint128_t(10000000000),
+        uint128_t(100000000000),
+        uint128_t(1000000000000),
+        uint128_t(10000000000000),
+        uint128_t(100000000000000),
+        uint128_t(1000000000000000),
+        uint128_t(10000000000000000),
+        uint128_t(100000000000000000),
+        uint128_t(1000000000000000000),
+        uint128_t(10000000000000000000ULL),
+        uint128_t(10000000000000000000ULL) * uint128_t(10),
+        uint128_t(10000000000000000000ULL) * uint128_t(100),
+        uint128_t(10000000000000000000ULL) * uint128_t(1000),
+        uint128_t(10000000000000000000ULL) * uint128_t(10000),
+        uint128_t(10000000000000000000ULL) * uint128_t(100000),
+        uint128_t(10000000000000000000ULL) * uint128_t(1000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(10000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(100000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(1000000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(10000000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(100000000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(1000000000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(10000000000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(100000000000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(1000000000000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(10000000000000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(100000000000000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(1000000000000000000),
+        uint128_t(10000000000000000000ULL) * uint128_t(10000000000000000000ULL),
+        uint128_t(10000000000000000000ULL) * uint128_t(10000000000000000000ULL) * uint128_t(10ULL),
+    };
+
+    return builtin_128_pow10[static_cast<std::size_t>(n)];
+}
+
+BOOST_DECIMAL_GPU_ENABLED constexpr auto pow10(const uint256_t& n) noexcept -> uint256_t
+{
+    constexpr uint256_t emulated_256_pow10[] = {
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(1)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(10)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(100)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(1000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(10000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(100000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(1000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(10000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(100000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(1000000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(10000000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(100000000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(1000000000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(10000000000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(100000000000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(1000000000000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(10000000000000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(100000000000000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(1000000000000000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(0), UINT64_C(10000000000000000000)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(5), UINT64_C(7766279631452241920)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(54), UINT64_C(3875820019684212736)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(542), UINT64_C(1864712049423024128)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(5421), UINT64_C(200376420520689664)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(54210), UINT64_C(2003764205206896640)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(542101), UINT64_C(1590897978359414784)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(5421010), UINT64_C(15908979783594147840)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(54210108), UINT64_C(11515845246265065472)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(542101086), UINT64_C(4477988020393345024)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(5421010862), UINT64_C(7886392056514347008)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(54210108624), UINT64_C(5076944270305263616)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(542101086242), UINT64_C(13875954555633532928)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(5421010862427), UINT64_C(9632337040368467968)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(54210108624275), UINT64_C(4089650035136921600)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(542101086242752), UINT64_C(4003012203950112768)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(5421010862427522), UINT64_C(3136633892082024448)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(54210108624275221), UINT64_C(12919594847110692864)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(542101086242752217), UINT64_C(68739955140067328)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(0)}, uint128{UINT64_C(5421010862427522170), UINT64_C(687399551400673280)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(2)}, uint128{UINT64_C(17316620476856118468), UINT64_C(6873995514006732800)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(29)}, uint128{UINT64_C(7145508105175220139), UINT64_C(13399722918938673152)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(293)}, uint128{UINT64_C(16114848830623546549), UINT64_C(4870020673419870208)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(2938)}, uint128{UINT64_C(13574535716559052564), UINT64_C(11806718586779598848)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(29387)}, uint128{UINT64_C(6618148649623664334), UINT64_C(7386721425538678784)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(293873)}, uint128{UINT64_C(10841254275107988496), UINT64_C(80237960548581376)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(2938735)}, uint128{UINT64_C(16178822382532126880), UINT64_C(802379605485813760)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(29387358)}, uint128{UINT64_C(14214271235644855872), UINT64_C(8023796054858137600)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(293873587)}, uint128{UINT64_C(13015503840481697412), UINT64_C(6450984253743169536)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(2938735877)}, uint128{UINT64_C(1027829888850112811), UINT64_C(9169610316303040512)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(29387358770)}, uint128{UINT64_C(10278298888501128114), UINT64_C(17909126868192198656)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(293873587705)}, uint128{UINT64_C(10549268516463523069), UINT64_C(13070572018536022016)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(2938735877055)}, uint128{UINT64_C(13258964796087472617), UINT64_C(1578511669393358848)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(29387358770557)}, uint128{UINT64_C(3462439444907864858), UINT64_C(15785116693933588480)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(293873587705571)}, uint128{UINT64_C(16177650375369096972), UINT64_C(10277214349659471872)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(2938735877055718)}, uint128{UINT64_C(14202551164014556797), UINT64_C(10538423128046960640)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(29387358770557187)}, uint128{UINT64_C(12898303124178706663), UINT64_C(13150510911921848320)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(293873587705571876)}, uint128{UINT64_C(18302566799529756941), UINT64_C(2377900603251621888)}},
+    uint256_t{uint128{UINT64_C(0), UINT64_C(2938735877055718769)}, uint128{UINT64_C(17004971331911604867), UINT64_C(5332261958806667264)}},
+    uint256_t{uint128{UINT64_C(1), UINT64_C(10940614696847636083)}, uint128{UINT64_C(4029016655730084128), UINT64_C(16429131440647569408)}},
+    uint256_t{uint128{UINT64_C(15), UINT64_C(17172426599928602752)}, uint128{UINT64_C(3396678409881738056), UINT64_C(16717361816799281152)}},
+    uint256_t{uint128{UINT64_C(159), UINT64_C(5703569335900062977)}, uint128{UINT64_C(15520040025107828953), UINT64_C(1152921504606846976)}},
+    uint256_t{uint128{UINT64_C(1593), UINT64_C(1695461137871974930)}, uint128{UINT64_C(7626447661401876602), UINT64_C(11529215046068469760)}},
+    uint256_t{uint128{UINT64_C(15930), UINT64_C(16954611378719749304)}, uint128{UINT64_C(2477500319180559562), UINT64_C(4611686018427387904)}},
+    uint256_t{uint128{UINT64_C(159309), UINT64_C(3525417123811528497)}, uint128{UINT64_C(6328259118096044006), UINT64_C(9223372036854775808)}},
+    uint256_t{uint128{UINT64_C(1593091), UINT64_C(16807427164405733357)}, uint128{UINT64_C(7942358959831785217), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(15930919), UINT64_C(2053574980671369030)}, uint128{UINT64_C(5636613303479645706), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(159309191), UINT64_C(2089005733004138687)}, uint128{UINT64_C(1025900813667802212), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(1593091911), UINT64_C(2443313256331835254)}, uint128{UINT64_C(10259008136678022120), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(15930919111), UINT64_C(5986388489608800929)}, uint128{UINT64_C(10356360998232463120), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(159309191113), UINT64_C(4523652674959354447)}, uint128{UINT64_C(11329889613776873120), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(1593091911132), UINT64_C(8343038602174441244)}, uint128{UINT64_C(2618431695511421504), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(15930919111324), UINT64_C(9643409726906205977)}, uint128{UINT64_C(7737572881404663424), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(159309191113245), UINT64_C(4200376900514301694)}, uint128{UINT64_C(3588752519208427776), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(1593091911132452), UINT64_C(5110280857723913709)}, uint128{UINT64_C(17440781118374726144), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(15930919111324522), UINT64_C(14209320429820033867)}, uint128{UINT64_C(8387114520361296896), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(159309191113245227), UINT64_C(12965995782233477362)}, uint128{UINT64_C(10084168908774762496), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(1593091911132452277), UINT64_C(532749306367912313)}, uint128{UINT64_C(8607968719199866880), UINT64_C(0)}},
+    uint256_t{uint128{UINT64_C(15930919111324522770), UINT64_C(5327493063679123134)}, uint128{UINT64_C(12292710897160462336), UINT64_C(0)}},
+    };
+
+
+    return emulated_256_pow10[static_cast<std::size_t>(n.low.low)];
+}
+
+
+} // Namespace detail
+} // Namespace decimal
+} // Namespace boost
+
+#endif
+
 #endif // BOOST_DECIMAL_DETAIL_POWER_TABLES_HPP
diff --git a/include/boost/decimal/detail/promotion.hpp b/include/boost/decimal/detail/promotion.hpp
index 9e0063b2b..a0bdf27dd 100644
--- a/include/boost/decimal/detail/promotion.hpp
+++ b/include/boost/decimal/detail/promotion.hpp
@@ -6,6 +6,7 @@
 #define BOOST_DECIMAL_DETAIL_PROMOTION_HPP
 
 #include <boost/decimal/fwd.hpp>
+#include <boost/decimal/detail/config.hpp>
 #include <boost/decimal/detail/type_traits.hpp>
 
 #ifndef BOOST_DECIMAL_BUILD_MODULE
@@ -73,7 +74,7 @@ constexpr int decimal_val_v = decimal_val<T>::value;
 template<typename T>
 struct promote_arg
 {
-    using type = std::conditional_t<detail::is_integral_v<T>, double, T>;
+    using type = BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::conditional_t<detail::is_integral_v<T>, double, T>;
 };
 
 template<typename T>
@@ -86,11 +87,11 @@ using promote_arg_t = typename promote_arg<T>::type;
 template<typename T1, typename T2>
 struct promote_2_args
 {
-    using type = std::conditional_t<(is_decimal_floating_point_v<T1> && is_decimal_floating_point_v<T2>),
-                 std::conditional_t<(decimal_val_v<T1> > decimal_val_v<T2>), T1, T2>,
-                 std::conditional_t<is_decimal_floating_point_v<T1>, T1,
-                        std::conditional_t<is_decimal_floating_point_v<T2>, T2,
-                                std::conditional_t<(sizeof(promote_arg_t<T1>) > sizeof(promote_arg_t<T2>)),
+    using type = BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::conditional_t<(is_decimal_floating_point_v<T1> && is_decimal_floating_point_v<T2>),
+                 BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::conditional_t<(decimal_val_v<T1> > decimal_val_v<T2>), T1, T2>,
+                 BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::conditional_t<is_decimal_floating_point_v<T1>, T1,
+                        BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::conditional_t<is_decimal_floating_point_v<T2>, T2,
+                                BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::conditional_t<(sizeof(promote_arg_t<T1>) > sizeof(promote_arg_t<T2>)),
                                                     promote_arg_t<T1>, promote_arg_t<T2>>>>>;
 };
 
diff --git a/include/boost/decimal/detail/shrink_significand.hpp b/include/boost/decimal/detail/shrink_significand.hpp
index 707d47c7b..ba2aba5ed 100644
--- a/include/boost/decimal/detail/shrink_significand.hpp
+++ b/include/boost/decimal/detail/shrink_significand.hpp
@@ -5,25 +5,22 @@
 #ifndef BOOST_DECIMAL_DETAIL_SHRINK_SIGNIFICAND_HPP
 #define BOOST_DECIMAL_DETAIL_SHRINK_SIGNIFICAND_HPP
 
+#include <boost/decimal/detail/config.hpp>
 #include <boost/decimal/detail/type_traits.hpp>
 #include <boost/decimal/detail/apply_sign.hpp>
 #include <boost/decimal/detail/integer_search_trees.hpp>
 #include <boost/decimal/detail/power_tables.hpp>
-
-#ifndef BOOST_DECIMAL_BUILD_MODULE
-#include <limits>
-#include <type_traits>
-#endif
+#include <boost/decimal/detail/numeric_limits.hpp>
 
 namespace boost {
 namespace decimal {
 namespace detail {
 
 template <typename TargetType = std::uint32_t, typename Integer, typename Exp>
-constexpr auto shrink_significand(Integer sig, Exp& exp) noexcept -> TargetType
+BOOST_DECIMAL_GPU_ENABLED constexpr auto shrink_significand(Integer sig, Exp& exp) noexcept -> TargetType
 {
     using Unsigned_Integer = make_unsigned_t<Integer>;
-    constexpr auto max_digits {std::numeric_limits<TargetType>::digits10};
+    constexpr auto max_digits {boost::decimal::detail::numeric_limits<TargetType>::digits10};
 
     auto unsigned_sig {make_positive_unsigned(sig)};
     const auto sig_dig {num_digits(unsigned_sig)};
diff --git a/include/boost/decimal/detail/sub_impl.hpp b/include/boost/decimal/detail/sub_impl.hpp
index 39ea7be45..f37364d7b 100644
--- a/include/boost/decimal/detail/sub_impl.hpp
+++ b/include/boost/decimal/detail/sub_impl.hpp
@@ -18,9 +18,10 @@ namespace decimal {
 namespace detail {
 
 template <typename ReturnType, typename T, typename U>
-BOOST_DECIMAL_FORCE_INLINE constexpr auto d32_sub_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                                                       T rhs_sig, U rhs_exp, bool rhs_sign,
-                                                       bool abs_lhs_bigger) noexcept -> ReturnType
+BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_FORCE_INLINE 
+constexpr auto d32_sub_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
+                            T rhs_sig, U rhs_exp, bool rhs_sign,
+                            bool abs_lhs_bigger) noexcept -> ReturnType
 {
     using sub_type = std::int_fast32_t;
 
@@ -84,9 +85,9 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto d32_sub_impl(T lhs_sig, U lhs_exp, boo
 }
 
 template <typename ReturnType, BOOST_DECIMAL_INTEGRAL T, BOOST_DECIMAL_INTEGRAL U>
-constexpr auto d64_sub_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                            T rhs_sig, U rhs_exp, bool rhs_sign,
-                            bool abs_lhs_bigger) noexcept -> ReturnType
+BOOST_DECIMAL_GPU_ENABLED constexpr auto d64_sub_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
+                                                      T rhs_sig, U rhs_exp, bool rhs_sign,
+                                                      bool abs_lhs_bigger) noexcept -> ReturnType
 {
     using sub_type = std::int_fast64_t;
 
@@ -150,9 +151,9 @@ constexpr auto d64_sub_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
 }
 
 template <typename ReturnType, BOOST_DECIMAL_INTEGRAL T, BOOST_DECIMAL_INTEGRAL U>
-constexpr auto d128_sub_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                             T rhs_sig, U rhs_exp, bool rhs_sign,
-                             bool abs_lhs_bigger) noexcept -> ReturnType
+BOOST_DECIMAL_GPU_ENABLED constexpr auto d128_sub_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
+                                                       T rhs_sig, U rhs_exp, bool rhs_sign,
+                                                       bool abs_lhs_bigger) noexcept -> ReturnType
 {
     #if defined(BOOST_DECIMAL_HAS_INT128) && (!defined(__clang_major__) || __clang_major__ > 13)
     using sub_type = detail::int128_t;
diff --git a/include/boost/decimal/detail/tuple.hpp b/include/boost/decimal/detail/tuple.hpp
new file mode 100644
index 000000000..2192ddd82
--- /dev/null
+++ b/include/boost/decimal/detail/tuple.hpp
@@ -0,0 +1,61 @@
+// Copyright 2024 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#ifndef BOOST_DECIMAL_DETAIL_TUPLE_HPP
+#define BOOST_DECIMAL_DETAIL_TUPLE_HPP
+
+#include <boost/decimal/detail/config.hpp>
+
+#ifdef BOOST_DECIMAL_ENABLE_CUDA
+
+#include <thrust/pair.h>
+#include <thrust/tuple.h>
+
+namespace boost { 
+namespace decimal {
+
+using thrust::pair;
+using thrust::tuple;
+
+using thrust::make_pair;
+using thrust::make_tuple;
+
+using thrust::tie;
+using thrust::get;
+
+using thrust::tuple_size;
+using thrust::tuple_element;
+
+} // namespace decimal
+} // namespace boost
+
+#else
+
+#include <tuple>
+
+namespace boost { 
+namespace decimal {
+
+using ::std::tuple;
+using ::std::pair;
+
+// [6.1.3.2] Tuple creation functions
+using ::std::ignore;
+using ::std::make_tuple;
+using ::std::tie;
+using ::std::get;
+
+// [6.1.3.3] Tuple helper classes
+using ::std::tuple_size;
+using ::std::tuple_element;
+
+// Pair helpers
+using ::std::make_pair;
+
+} // namespace decimal
+} // namespace boost
+
+#endif // BOOST_DECIMAL_HAS_CUDA
+
+#endif // BOOST_DECIMAL_DETAIL_TUPLE_HPP
diff --git a/include/boost/decimal/detail/type_traits.hpp b/include/boost/decimal/detail/type_traits.hpp
index 5b7b841ec..cfde2052e 100644
--- a/include/boost/decimal/detail/type_traits.hpp
+++ b/include/boost/decimal/detail/type_traits.hpp
@@ -10,16 +10,20 @@
 #include <boost/decimal/detail/config.hpp>
 #include <boost/decimal/detail/emulated128.hpp>
 
-#ifndef BOOST_DECIMAL_BUILD_MODULE
+#if !defined(BOOST_DECIMAL_BUILD_MODULE) && !defined(BOOST_DECIMAL_ENABLE_CUDA)
 #include <type_traits>
 #endif
 
+#ifdef BOOST_DECIMAL_ENABLE_CUDA
+#include <cuda/std/type_traits>
+#endif
+
 namespace boost {
 namespace decimal {
 namespace detail {
 
 template <typename T>
-struct is_signed { static constexpr bool value = std::is_signed<T>::value; };
+struct is_signed { static constexpr bool value = BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::is_signed<T>::value; };
 
 template <>
 struct is_signed<uint128> { static constexpr bool value = false; };
@@ -47,7 +51,7 @@ template <typename T>
 constexpr bool is_unsigned_v = !is_signed_v<T>;
 
 template <typename T>
-struct make_unsigned { using type = std::make_unsigned_t<T>; };
+struct make_unsigned { using type = BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::make_unsigned_t<T>; };
 
 template <>
 struct make_unsigned<uint128> { using type = uint128; };
@@ -69,7 +73,7 @@ template <typename T>
 using make_unsigned_t = typename make_unsigned<T>::type;
 
 template <typename T>
-struct make_signed { using type = std::make_signed_t<T>; };
+struct make_signed { using type = BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::make_signed_t<T>; };
 
 template <>
 struct make_signed<uint128> { using type = int128; };
@@ -91,7 +95,7 @@ template <typename T>
 using make_signed_t = typename make_signed<T>::type;
 
 template <typename T>
-struct is_integral { static constexpr bool value = std::is_integral<T>::value;};
+struct is_integral { static constexpr bool value = BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::is_integral<T>::value;};
 
 template <>
 struct is_integral<uint128> { static constexpr bool value = true; };
@@ -116,7 +120,7 @@ template <typename T>
 constexpr bool is_integral_v = is_integral<T>::value;
 
 template <typename T>
-struct is_floating_point { static constexpr bool value = std::is_floating_point<T>::value; };
+struct is_floating_point { static constexpr bool value = BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::is_floating_point<T>::value; };
 
 #ifdef BOOST_DECIMAL_HAS_FLOAT128
 template <>
diff --git a/include/boost/decimal/detail/wide-integer/uintwide_t.hpp b/include/boost/decimal/detail/wide-integer/uintwide_t.hpp
index 741ec7861..a66bc5127 100644
--- a/include/boost/decimal/detail/wide-integer/uintwide_t.hpp
+++ b/include/boost/decimal/detail/wide-integer/uintwide_t.hpp
@@ -19,6 +19,8 @@
 #define BOOST_DECIMAL_WIDE_INTEGER_NAMESPACE boost::decimal
 
 #include <boost/decimal/detail/config.hpp>
+#include <boost/decimal/detail/numeric_limits.hpp>
+#include <boost/decimal/detail/type_traits.hpp>
 
 #ifndef BOOST_DECIMAL_BUILD_MODULE
 #include <cinttypes>
@@ -163,48 +165,56 @@ class reverse_iterator : public my_iterator<typename iterator_traits<iterator_ty
   using reference         = typename iterator_traits<iterator_type>::reference;
   using iterator_category = typename iterator_traits<iterator_type>::iterator_category;
 
-  constexpr reverse_iterator() = default;
+  #ifdef BOOST_DECIMAL_ENABLE_CUDA
+  #  pragma nv_diag_suppress 20012
+  #endif
+
+  BOOST_DECIMAL_GPU_ENABLED constexpr reverse_iterator() = default;
 
-  explicit constexpr reverse_iterator(iterator_type x) : current(x) { }
+  #ifdef BOOST_DECIMAL_ENABLE_CUDA
+  #  pragma nv_diag_default 20012
+  #endif
+
+  BOOST_DECIMAL_GPU_ENABLED explicit constexpr reverse_iterator(iterator_type x) : current(x) { }
 
   template<typename other>
-  constexpr reverse_iterator(const reverse_iterator<other>& u) : current(u.current) { } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions)
+  BOOST_DECIMAL_GPU_ENABLED constexpr reverse_iterator(const reverse_iterator<other>& u) : current(u.current) { } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions)
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto base() const -> iterator_type { return current; }
+  BOOST_DECIMAL_GPU_ENABLED BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto base() const -> iterator_type { return current; }
 
-  constexpr auto operator* () const -> reference { iterator_type tmp = current; return *--tmp; }
-  constexpr auto operator->() const -> pointer   { return &(operator*()); }
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto operator* () const -> reference { iterator_type tmp = current; return *--tmp; }
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto operator->() const -> pointer   { return &(operator*()); }
 
-  constexpr auto operator++() -> reverse_iterator& { --current; return *this; }
-  constexpr auto operator--() -> reverse_iterator& { ++current; return *this; }
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto operator++() -> reverse_iterator& { --current; return *this; }
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto operator--() -> reverse_iterator& { ++current; return *this; }
 
-  constexpr auto operator++(int) -> reverse_iterator { reverse_iterator tmp = *this; --current; return tmp; }
-  constexpr auto operator--(int) -> reverse_iterator { reverse_iterator tmp = *this; ++current; return tmp; }
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto operator++(int) -> reverse_iterator { reverse_iterator tmp = *this; --current; return tmp; }
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto operator--(int) -> reverse_iterator { reverse_iterator tmp = *this; ++current; return tmp; }
 
-  constexpr auto operator+(typename reverse_iterator<iterator_type>::difference_type n) const -> reverse_iterator { return reverse_iterator(current - n); }
-  constexpr auto operator-(typename reverse_iterator<iterator_type>::difference_type n) const -> reverse_iterator { return reverse_iterator(current + n); }
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(typename reverse_iterator<iterator_type>::difference_type n) const -> reverse_iterator { return reverse_iterator(current - n); }
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(typename reverse_iterator<iterator_type>::difference_type n) const -> reverse_iterator { return reverse_iterator(current + n); }
 
-  constexpr auto operator+=(typename reverse_iterator<iterator_type>::difference_type n) -> reverse_iterator& { current -= n; return *this; }
-  constexpr auto operator-=(typename reverse_iterator<iterator_type>::difference_type n) -> reverse_iterator& { current += n; return *this; }
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+=(typename reverse_iterator<iterator_type>::difference_type n) -> reverse_iterator& { current -= n; return *this; }
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-=(typename reverse_iterator<iterator_type>::difference_type n) -> reverse_iterator& { current += n; return *this; }
 
-  constexpr auto operator[](typename reverse_iterator<iterator_type>::difference_type n) const -> reference { return current[-n - 1]; }
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto operator[](typename reverse_iterator<iterator_type>::difference_type n) const -> reference { return current[-n - 1]; }
 
 private:
   iterator_type current; // NOLINT(readability-identifier-naming)
 
-  friend constexpr auto operator< (const reverse_iterator& x, const reverse_iterator& y) -> bool { return (x.current  > y.current); }
-  friend constexpr auto operator<=(const reverse_iterator& x, const reverse_iterator& y) -> bool { return (x.current >= y.current); }
-  friend constexpr auto operator==(const reverse_iterator& x, const reverse_iterator& y) -> bool { return (x.current == y.current); }
-  friend constexpr auto operator!=(const reverse_iterator& x, const reverse_iterator& y) -> bool { return (x.current != y.current); }
-  friend constexpr auto operator>=(const reverse_iterator& x, const reverse_iterator& y) -> bool { return (x.current <= y.current); }
-  friend constexpr auto operator> (const reverse_iterator& x, const reverse_iterator& y) -> bool { return (x.current <  y.current); }
+  friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator< (const reverse_iterator& x, const reverse_iterator& y) -> bool { return (x.current  > y.current); }
+  friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator<=(const reverse_iterator& x, const reverse_iterator& y) -> bool { return (x.current >= y.current); }
+  friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator==(const reverse_iterator& x, const reverse_iterator& y) -> bool { return (x.current == y.current); }
+  friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator!=(const reverse_iterator& x, const reverse_iterator& y) -> bool { return (x.current != y.current); }
+  friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator>=(const reverse_iterator& x, const reverse_iterator& y) -> bool { return (x.current <= y.current); }
+  friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator> (const reverse_iterator& x, const reverse_iterator& y) -> bool { return (x.current <  y.current); }
 
-  friend constexpr auto operator-(const reverse_iterator& x, const reverse_iterator& y) -> typename reverse_iterator::difference_type
+  friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator-(const reverse_iterator& x, const reverse_iterator& y) -> typename reverse_iterator::difference_type
   {
     return (y.current - x.current);
   }
 
-  friend constexpr auto operator+(typename reverse_iterator::difference_type n, const reverse_iterator& x) -> reverse_iterator
+  friend BOOST_DECIMAL_GPU_ENABLED constexpr auto operator+(typename reverse_iterator::difference_type n, const reverse_iterator& x) -> reverse_iterator
   {
     return reverse_iterator(x.current - n);
   }
@@ -215,7 +225,7 @@ class reverse_iterator : public my_iterator<typename iterator_traits<iterator_ty
 // Use a local, constexpr, unsafe implementation of the fill-function.
 template<typename DestinationIterator,
          typename ValueType>
-constexpr auto fill_unsafe(DestinationIterator first, DestinationIterator last, ValueType val) -> void
+BOOST_DECIMAL_GPU_ENABLED constexpr auto fill_unsafe(DestinationIterator first, DestinationIterator last, ValueType val) -> void
 {
   while(first != last)
   {
@@ -228,7 +238,7 @@ constexpr auto fill_unsafe(DestinationIterator first, DestinationIterator last,
 // Use a local, constexpr, unsafe implementation of the copy-function.
 template<typename InputIterator,
          typename DestinationIterator>
-constexpr auto copy_unsafe(InputIterator first, InputIterator last, DestinationIterator dest) -> DestinationIterator
+BOOST_DECIMAL_GPU_ENABLED constexpr auto copy_unsafe(InputIterator first, InputIterator last, DestinationIterator dest) -> DestinationIterator
 {
   while(first != last)
   {
@@ -268,59 +278,59 @@ class array_unsafe
 
   static constexpr size_type static_size = N;
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto begin() -> iterator { return elems; }                 // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto end  () -> iterator { return elems + N; }             // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto begin() -> iterator { return elems; }                 // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto end  () -> iterator { return elems + N; }             // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto begin() const -> const_iterator { return elems; }     // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto end  () const -> const_iterator { return elems + N; } // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto begin() const -> const_iterator { return elems; }     // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto end  () const -> const_iterator { return elems + N; } // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto cbegin() const -> const_iterator { return elems; }     // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto cend  () const -> const_iterator { return elems + N; } // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto cbegin() const -> const_iterator { return elems; }     // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto cend  () const -> const_iterator { return elems + N; } // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto rbegin() -> reverse_iterator { return reverse_iterator(elems + N); }                    // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto rend  () -> reverse_iterator { return reverse_iterator(elems); }                        // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto rbegin() -> reverse_iterator { return reverse_iterator(elems + N); }                    // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto rend  () -> reverse_iterator { return reverse_iterator(elems); }                        // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto rbegin() const -> const_reverse_iterator { return const_reverse_iterator(elems + N); }  // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto rend  () const -> const_reverse_iterator { return const_reverse_iterator(elems); }      // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto rbegin() const -> const_reverse_iterator { return const_reverse_iterator(elems + N); }  // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto rend  () const -> const_reverse_iterator { return const_reverse_iterator(elems); }      // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto crbegin() const -> const_reverse_iterator { return const_reverse_iterator(elems + N); } // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto crend  () const -> const_reverse_iterator { return const_reverse_iterator(elems); }     // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto crbegin() const -> const_reverse_iterator { return const_reverse_iterator(elems + N); } // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto crend  () const -> const_reverse_iterator { return const_reverse_iterator(elems); }     // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto operator[](const size_type i)       -> reference       { return elems[i]; } // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index)
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto operator[](const size_type i) const -> const_reference { return elems[i]; } // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto operator[](const size_type i)       -> reference       { return elems[i]; } // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto operator[](const size_type i) const -> const_reference { return elems[i]; } // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index)
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto at(const size_type i)       -> reference       { return elems[i]; } // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index)
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto at(const size_type i) const -> const_reference { return elems[i]; } // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto at(const size_type i)       -> reference       { return elems[i]; } // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto at(const size_type i) const -> const_reference { return elems[i]; } // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index)
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto front()       -> reference       { return elems[static_cast<size_type>(UINT8_C(0))]; }
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto front() const -> const_reference { return elems[static_cast<size_type>(UINT8_C(0))]; }
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto front()       -> reference       { return elems[static_cast<size_type>(UINT8_C(0))]; }
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto front() const -> const_reference { return elems[static_cast<size_type>(UINT8_C(0))]; }
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto back()       -> reference       { return elems[static_cast<size_type>(N - static_cast<size_type>(UINT8_C(1)))]; }
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto back() const -> const_reference { return elems[static_cast<size_type>(N - static_cast<size_type>(UINT8_C(1)))]; }
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto back()       -> reference       { return elems[static_cast<size_type>(N - static_cast<size_type>(UINT8_C(1)))]; }
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto back() const -> const_reference { return elems[static_cast<size_type>(N - static_cast<size_type>(UINT8_C(1)))]; }
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD static constexpr auto size()     -> size_type { return N; }
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD static constexpr auto empty()    -> bool      { return false; }
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD static constexpr auto max_size() -> size_type { return N; }
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD static BOOST_DECIMAL_GPU_ENABLED constexpr auto size()     -> size_type { return N; }
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD static BOOST_DECIMAL_GPU_ENABLED constexpr auto empty()    -> bool      { return false; }
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD static BOOST_DECIMAL_GPU_ENABLED constexpr auto max_size() -> size_type { return N; }
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto data() const -> const_pointer { return elems; } // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto data()       -> pointer       { return elems; } // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto data() const -> const_pointer { return elems; } // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto data()       -> pointer       { return elems; } // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD constexpr auto c_array() -> pointer { return elems; }
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_GPU_ENABLED constexpr auto c_array() -> pointer { return elems; }
 
   template<typename T2>
-  constexpr auto operator=(const array_unsafe<T2, N>& y) -> array_unsafe&
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto operator=(const array_unsafe<T2, N>& y) -> array_unsafe&
   {
     copy_unsafe(y.begin(), y.end(), begin());
 
     return *this;
   }
 
-  constexpr auto assign(const value_type& value) -> void
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto assign(const value_type& value) -> void
   {
     fill_unsafe(elems, elems + N, value); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay,cppcoreguidelines-pro-bounds-pointer-arithmetic)
   }
 
-  constexpr auto fill(const value_type& value) -> void
+  BOOST_DECIMAL_GPU_ENABLED constexpr auto fill(const value_type& value) -> void
   {
     assign(value);
   }
@@ -331,8 +341,8 @@ class array_unsafe
 using size_t    = std::uint32_t;
 using ptrdiff_t = std::int32_t;
 
-static_assert((  (std::numeric_limits<size_t>::digits        >= std::numeric_limits<std::uint16_t>::digits)
-              && (std::numeric_limits<ptrdiff_t>::digits + 1 >= std::numeric_limits<std::uint16_t>::digits)),
+static_assert((  (boost::decimal::detail::numeric_limits<size_t>::digits        >= boost::decimal::detail::numeric_limits<std::uint16_t>::digits)
+              && (boost::decimal::detail::numeric_limits<ptrdiff_t>::digits + 1 >= boost::decimal::detail::numeric_limits<std::uint16_t>::digits)),
               "Error: size type and pointer difference type must be at least 16 bits in width (or wider)");
 
 template<const size_t Width2> struct verify_power_of_two // NOLINT(altera-struct-pack-align)
@@ -355,9 +365,9 @@ template<const size_t BitCount,
 struct uint_type_helper
 {
 private:
-  static constexpr auto bit_count   () -> size_t { return BitCount; }
-  static constexpr auto bit_count_lo() -> size_t { return static_cast<size_t>(UINT8_C(8)); }
-  static constexpr auto bit_count_hi() -> size_t { return static_cast<size_t>(UINT8_C(64)); }
+  static BOOST_DECIMAL_GPU_ENABLED constexpr auto bit_count   () -> size_t { return BitCount; }
+  static BOOST_DECIMAL_GPU_ENABLED constexpr auto bit_count_lo() -> size_t { return static_cast<size_t>(UINT8_C(8)); }
+  static BOOST_DECIMAL_GPU_ENABLED constexpr auto bit_count_hi() -> size_t { return static_cast<size_t>(UINT8_C(64)); }
 
   static_assert((   ((bit_count() >= bit_count_lo()) && (BitCount <= bit_count_hi())) // NOLINT(cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers)
                  && (verify_power_of_two<bit_count()>::conditional_value)),
@@ -370,22 +380,22 @@ struct uint_type_helper
   using fast_signed_type    = std::intmax_t;
 };
 
-template<const size_t BitCount> struct uint_type_helper<BitCount, std::enable_if_t<                                                  (BitCount <= static_cast<size_t>(UINT8_C(  8)))>> { using exact_unsigned_type = std::uint8_t;      using exact_signed_type = std::int8_t;     using fast_unsigned_type = std::uint_fast8_t;  using fast_signed_type = std::int_fast8_t;  };
-template<const size_t BitCount> struct uint_type_helper<BitCount, std::enable_if_t<(BitCount >= static_cast<size_t>(UINT8_C( 9))) && (BitCount <= static_cast<size_t>(UINT8_C( 16)))>> { using exact_unsigned_type = std::uint16_t;     using exact_signed_type = std::int16_t;    using fast_unsigned_type = std::uint_fast16_t; using fast_signed_type = std::int_fast16_t; };
-template<const size_t BitCount> struct uint_type_helper<BitCount, std::enable_if_t<(BitCount >= static_cast<size_t>(UINT8_C(17))) && (BitCount <= static_cast<size_t>(UINT8_C( 32)))>> { using exact_unsigned_type = std::uint32_t;     using exact_signed_type = std::int32_t;    using fast_unsigned_type = std::uint_fast32_t; using fast_signed_type = std::int_fast32_t; };
-template<const size_t BitCount> struct uint_type_helper<BitCount, std::enable_if_t<(BitCount >= static_cast<size_t>(UINT8_C(33))) && (BitCount <= static_cast<size_t>(UINT8_C( 64)))>> { using exact_unsigned_type = std::uint64_t;     using exact_signed_type = std::int64_t;    using fast_unsigned_type = std::uint_fast64_t; using fast_signed_type = std::int_fast64_t; };
+template<const size_t BitCount> struct uint_type_helper<BitCount, BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<                                                  (BitCount <= static_cast<size_t>(UINT8_C(  8)))>> { using exact_unsigned_type = std::uint8_t;      using exact_signed_type = std::int8_t;     using fast_unsigned_type = std::uint_fast8_t;  using fast_signed_type = std::int_fast8_t;  };
+template<const size_t BitCount> struct uint_type_helper<BitCount, BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<(BitCount >= static_cast<size_t>(UINT8_C( 9))) && (BitCount <= static_cast<size_t>(UINT8_C( 16)))>> { using exact_unsigned_type = std::uint16_t;     using exact_signed_type = std::int16_t;    using fast_unsigned_type = std::uint_fast16_t; using fast_signed_type = std::int_fast16_t; };
+template<const size_t BitCount> struct uint_type_helper<BitCount, BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<(BitCount >= static_cast<size_t>(UINT8_C(17))) && (BitCount <= static_cast<size_t>(UINT8_C( 32)))>> { using exact_unsigned_type = std::uint32_t;     using exact_signed_type = std::int32_t;    using fast_unsigned_type = std::uint_fast32_t; using fast_signed_type = std::int_fast32_t; };
+template<const size_t BitCount> struct uint_type_helper<BitCount, BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::enable_if_t<(BitCount >= static_cast<size_t>(UINT8_C(33))) && (BitCount <= static_cast<size_t>(UINT8_C( 64)))>> { using exact_unsigned_type = std::uint64_t;     using exact_signed_type = std::int64_t;    using fast_unsigned_type = std::uint_fast64_t; using fast_signed_type = std::int_fast64_t; };
 
-using unsigned_fast_type = typename uint_type_helper<static_cast<size_t>(std::numeric_limits<size_t   >::digits + 0)>::fast_unsigned_type;
-using   signed_fast_type = typename uint_type_helper<static_cast<size_t>(std::numeric_limits<ptrdiff_t>::digits + 1)>::fast_signed_type;
+using unsigned_fast_type = typename uint_type_helper<static_cast<size_t>(boost::decimal::detail::numeric_limits<size_t   >::digits + 0)>::fast_unsigned_type;
+using   signed_fast_type = typename uint_type_helper<static_cast<size_t>(boost::decimal::detail::numeric_limits<ptrdiff_t>::digits + 1)>::fast_signed_type;
 
 template<typename InputIterator,
          typename IntegralType>
-constexpr auto advance_and_point(InputIterator it, IntegralType n) -> InputIterator
+BOOST_DECIMAL_GPU_ENABLED constexpr auto advance_and_point(InputIterator it, IntegralType n) -> InputIterator
 {
   using local_signed_integral_type =
-    std::conditional_t<std::is_signed<IntegralType>::value,
+    BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::conditional_t<BOOST_DECIMAL_TYPE_TRAITS_NAMESPACE::is_signed<IntegralType>::value,
                        IntegralType,
-                       typename detail::uint_type_helper<static_cast<size_t>(std::numeric_limits<IntegralType>::digits)>::exact_signed_type>;
+                       typename detail::uint_type_helper<static_cast<size_t>(boost::decimal::detail::numeric_limits<IntegralType>::digits)>::exact_signed_type>;
 
   using local_difference_type = typename detail::iterator_detail::iterator_traits<InputIterator>::difference_type;
 
@@ -393,8 +403,8 @@ constexpr auto advance_and_point(InputIterator it, IntegralType n) -> InputItera
 }
 
 template<typename UnsignedShortType,
-         typename UnsignedLargeType = typename detail::uint_type_helper<static_cast<size_t>(std::numeric_limits<UnsignedShortType>::digits * 2)>::exact_unsigned_type>
-constexpr auto make_lo(const UnsignedLargeType& u) -> UnsignedShortType
+         typename UnsignedLargeType = typename detail::uint_type_helper<static_cast<size_t>(boost::decimal::detail::numeric_limits<UnsignedShortType>::digits * 2)>::exact_unsigned_type>
+BOOST_DECIMAL_GPU_ENABLED constexpr auto make_lo(const UnsignedLargeType& u) -> UnsignedShortType
 {
   // From an unsigned integral input parameter of type UnsignedLargeType,
   // extract the low part of it. The type of the extracted
@@ -404,10 +414,10 @@ constexpr auto make_lo(const UnsignedLargeType& u) -> UnsignedShortType
   using local_ularge_type = UnsignedLargeType;
 
   // Compile-time checks.
-  static_assert((    ( std::numeric_limits<local_ushort_type>::is_integer)
-                 &&  ( std::numeric_limits<local_ularge_type>::is_integer)
-                 &&  (!std::numeric_limits<local_ushort_type>::is_signed)
-                 &&  (!std::numeric_limits<local_ularge_type>::is_signed)
+  static_assert((    ( boost::decimal::detail::numeric_limits<local_ushort_type>::is_integer)
+                 &&  ( boost::decimal::detail::numeric_limits<local_ularge_type>::is_integer)
+                 &&  (!boost::decimal::detail::numeric_limits<local_ushort_type>::is_signed)
+                 &&  (!boost::decimal::detail::numeric_limits<local_ularge_type>::is_signed)
                  &&  ((sizeof(local_ushort_type) * 2U) == sizeof(local_ularge_type))),
                  "Error: Please check the characteristics of the template parameters UnsignedShortType and UnsignedLargeType");
 
@@ -415,8 +425,8 @@ constexpr auto make_lo(const UnsignedLargeType& u) -> UnsignedShortType
 }
 
 template<typename UnsignedShortType,
-         typename UnsignedLargeType = typename detail::uint_type_helper<static_cast<size_t>(std::numeric_limits<UnsignedShortType>::digits * 2)>::exact_unsigned_type>
-constexpr auto make_hi(const UnsignedLargeType& u) -> UnsignedShortType
+         typename UnsignedLargeType = typename detail::uint_type_helper<static_cast<size_t>(boost::decimal::detail::numeric_limits<UnsignedShortType>::digits * 2)>::exact_unsigned_type>
+BOOST_DECIMAL_GPU_ENABLED constexpr auto make_hi(const UnsignedLargeType& u) -> UnsignedShortType
 {
   // From an unsigned integral input parameter of type UnsignedLargeType,
   // extract the high part of it. The type of the extracted
@@ -426,19 +436,19 @@ constexpr auto make_hi(const UnsignedLargeType& u) -> UnsignedShortType
   using local_ularge_type = UnsignedLargeType;
 
   // Compile-time checks.
-  static_assert((    ( std::numeric_limits<local_ushort_type>::is_integer)
-                 &&  ( std::numeric_limits<local_ularge_type>::is_integer)
-                 &&  (!std::numeric_limits<local_ushort_type>::is_signed)
-                 &&  (!std::numeric_limits<local_ularge_type>::is_signed)
+  static_assert((    ( boost::decimal::detail::numeric_limits<local_ushort_type>::is_integer)
+                 &&  ( boost::decimal::detail::numeric_limits<local_ularge_type>::is_integer)
+                 &&  (!boost::decimal::detail::numeric_limits<local_ushort_type>::is_signed)
+                 &&  (!boost::decimal::detail::numeric_limits<local_ularge_type>::is_signed)
                  &&  ((sizeof(local_ushort_type) * 2U) == sizeof(local_ularge_type))),
                  "Error: Please check the characteristics of the template parameters UnsignedShortType and UnsignedLargeType");
 
-  return static_cast<local_ushort_type>(u >> static_cast<local_ushort_type>(std::numeric_limits<local_ushort_type>::digits));
+  return static_cast<local_ushort_type>(u >> static_cast<local_ushort_type>(boost::decimal::detail::numeric_limits<local_ushort_type>::digits));
 }
 
 template<typename UnsignedShortType,
-         typename UnsignedLargeType = typename detail::uint_type_helper<static_cast<size_t>(std::numeric_limits<UnsignedShortType>::digits * 2)>::exact_unsigned_type>
-constexpr auto make_large(const UnsignedShortType& lo, const UnsignedShortType& hi) -> UnsignedLargeType
+         typename UnsignedLargeType = typename detail::uint_type_helper<static_cast<size_t>(boost::decimal::detail::numeric_limits<UnsignedShortType>::digits * 2)>::exact_unsigned_type>
+BOOST_DECIMAL_GPU_ENABLED constexpr auto make_large(const UnsignedShortType& lo, const UnsignedShortType& hi) -> UnsignedLargeType
 {
   // Create a composite unsigned integral value having type UnsignedLargeType.
   // Two constituents are used having type UnsignedShortType, whereby the
@@ -448,10 +458,10 @@ constexpr auto make_large(const UnsignedShortType& lo, const UnsignedShortType&
   using local_ularge_type = UnsignedLargeType;
 
   // Compile-time checks.
-  static_assert((    ( std::numeric_limits<local_ushort_type>::is_integer)
-                 &&  ( std::numeric_limits<local_ularge_type>::is_integer)
-                 &&  (!std::numeric_limits<local_ushort_type>::is_signed)
-                 &&  (!std::numeric_limits<local_ularge_type>::is_signed)
+  static_assert((    ( boost::decimal::detail::numeric_limits<local_ushort_type>::is_integer)
+                 &&  ( boost::decimal::detail::numeric_limits<local_ularge_type>::is_integer)
+                 &&  (!boost::decimal::detail::numeric_limits<local_ushort_type>::is_signed)
+                 &&  (!boost::decimal::detail::numeric_limits<local_ularge_type>::is_signed)
                  &&  ((sizeof(local_ushort_type) * 2U) == sizeof(local_ularge_type))),
                  "Error: Please check the characteristics of the template parameters UnsignedShortType and UnsignedLargeType");
 
@@ -460,7 +470,7 @@ constexpr auto make_large(const UnsignedShortType& lo, const UnsignedShortType&
     (
         static_cast<local_ularge_type>
         (
-          static_cast<local_ularge_type>(hi) << static_cast<unsigned>(std::numeric_limits<UnsignedShortType>::digits)
+          static_cast<local_ularge_type>(hi) << static_cast<unsigned>(boost::decimal::detail::numeric_limits<UnsignedShortType>::digits)
         )
       | lo
     );
@@ -481,7 +491,7 @@ class uintwide_t
   using limb_type = std::uint32_t;
 
   using double_limb_type =
-    typename detail::uint_type_helper<static_cast<size_t>(static_cast<int>(std::numeric_limits<limb_type>::digits * static_cast<int>(INT8_C(2))))>::exact_unsigned_type;
+    typename detail::uint_type_helper<static_cast<size_t>(static_cast<int>(boost::decimal::detail::numeric_limits<limb_type>::digits * static_cast<int>(INT8_C(2))))>::exact_unsigned_type;
 
   // Legacy ularge and ushort types. These are no longer used
   // in the class, but provided for legacy compatibility.
@@ -489,10 +499,10 @@ class uintwide_t
   using ularge_type = double_limb_type;
 
   // More compile-time checks.
-  static_assert((    ( std::numeric_limits<limb_type>::is_integer)
-                 &&  ( std::numeric_limits<double_limb_type>::is_integer)
-                 &&  (!std::numeric_limits<limb_type>::is_signed)
-                 &&  (!std::numeric_limits<double_limb_type>::is_signed)
+  static_assert((    ( boost::decimal::detail::numeric_limits<limb_type>::is_integer)
+                 &&  ( boost::decimal::detail::numeric_limits<double_limb_type>::is_integer)
+                 &&  (!boost::decimal::detail::numeric_limits<limb_type>::is_signed)
+                 &&  (!boost::decimal::detail::numeric_limits<double_limb_type>::is_signed)
                  &&  ((sizeof(limb_type) * 2U) == sizeof(double_limb_type))),
                  "Error: Please check the characteristics of the template parameters UnsignedShortType and UnsignedLargeType");
 
@@ -503,7 +513,7 @@ class uintwide_t
   static constexpr size_t number_of_limbs =
     static_cast<size_t>
     (
-      Width2 / static_cast<size_t>(std::numeric_limits<limb_type>::digits)
+      Width2 / static_cast<size_t>(boost::decimal::detail::numeric_limits<limb_type>::digits)
     );
 
   // The type of the internal data representation.
@@ -517,11 +527,14 @@ class uintwide_t
 
   // Provide a user interface to the internal data representation.
   BOOST_DECIMAL_WIDE_INTEGER_NODISCARD
-  BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR auto  representation()       ->       representation_type& { return values; }
+  BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR 
+  BOOST_DECIMAL_GPU_ENABLED auto  representation()       ->       representation_type& { return values; }
+
   BOOST_DECIMAL_WIDE_INTEGER_NODISCARD
-  BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR auto crepresentation() const -> const representation_type& { return values; }
+  BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR 
+  BOOST_DECIMAL_GPU_ENABLED auto crepresentation() const -> const representation_type& { return values; }
 
-  BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR auto eval_divide_knuth(const uintwide_t& other, uintwide_t& remainder) -> void
+  BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR BOOST_DECIMAL_GPU_ENABLED auto eval_divide_knuth(const uintwide_t& other, uintwide_t& remainder) -> void
   {
     using local_uint_index_type = unsigned_fast_type;
 
@@ -593,6 +606,7 @@ class uintwide_t
 
   template<typename ResultIterator,
            typename InputIteratorLeft>
+  BOOST_DECIMAL_GPU_ENABLED
   static BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR auto eval_multiply_1d(      ResultIterator                                                                   r,
                                                                           InputIteratorLeft                                                                a,
                                                                     const typename detail::iterator_detail::iterator_traits<InputIteratorLeft>::value_type b,
@@ -603,12 +617,12 @@ class uintwide_t
 
     static_assert
     (
-      (std::numeric_limits<local_limb_type>::digits == std::numeric_limits<left_value_type>::digits),
+      (boost::decimal::detail::numeric_limits<local_limb_type>::digits == boost::decimal::detail::numeric_limits<left_value_type>::digits),
       "Error: Internals require same widths for left-right-result limb_types at the moment"
     );
 
     using local_double_limb_type =
-      typename detail::uint_type_helper<static_cast<size_t>(std::numeric_limits<local_limb_type>::digits * 2)>::exact_unsigned_type;
+      typename detail::uint_type_helper<static_cast<size_t>(boost::decimal::detail::numeric_limits<local_limb_type>::digits * 2)>::exact_unsigned_type;
 
     auto carry = static_cast<local_double_limb_type>(UINT8_C(0));
 
@@ -642,6 +656,7 @@ class uintwide_t
   template<typename ResultIterator,
            typename InputIteratorLeft,
            typename InputIteratorRight>
+  BOOST_DECIMAL_GPU_ENABLED
   static constexpr auto eval_multiply_n_by_n_to_lo_part_128(      ResultIterator     r,
                                                                   InputIteratorLeft  a,
                                                                   InputIteratorRight b,
@@ -653,13 +668,13 @@ class uintwide_t
 
     static_assert
     (
-         (std::numeric_limits<local_limb_type>::digits == std::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorLeft>::value_type>::digits)
-      && (std::numeric_limits<local_limb_type>::digits == std::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorRight>::value_type>::digits),
+         (boost::decimal::detail::numeric_limits<local_limb_type>::digits == boost::decimal::detail::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorLeft>::value_type>::digits)
+      && (boost::decimal::detail::numeric_limits<local_limb_type>::digits == boost::decimal::detail::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorRight>::value_type>::digits),
       "Error: Internals require same widths for left-right-result limb_types at the moment"
     );
 
     using local_double_limb_type =
-      typename detail::uint_type_helper<static_cast<size_t>(static_cast<int>(std::numeric_limits<local_limb_type>::digits * static_cast<int>(INT8_C(2))))>::exact_unsigned_type;
+      typename detail::uint_type_helper<static_cast<size_t>(static_cast<int>(boost::decimal::detail::numeric_limits<local_limb_type>::digits * static_cast<int>(INT8_C(2))))>::exact_unsigned_type;
 
     using result_difference_type = typename detail::iterator_detail::iterator_traits<ResultIterator>::difference_type;
     using left_difference_type   = typename detail::iterator_detail::iterator_traits<InputIteratorLeft>::difference_type;
@@ -782,6 +797,7 @@ class uintwide_t
   template<typename ResultIterator,
            typename InputIteratorLeft,
            typename InputIteratorRight>
+  BOOST_DECIMAL_GPU_ENABLED
   static BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR auto eval_multiply_n_by_n_to_lo_part_256(      ResultIterator     r,
                                                                                              InputIteratorLeft  a,
                                                                                              InputIteratorRight b,
@@ -791,15 +807,15 @@ class uintwide_t
 
     static_assert
     (
-          (std::numeric_limits<typename detail::iterator_detail::iterator_traits<ResultIterator>::value_type>::digits == std::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorLeft>::value_type>::digits)
-      && (std::numeric_limits<typename detail::iterator_detail::iterator_traits<ResultIterator>::value_type>::digits == std::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorRight>::value_type>::digits),
+          (boost::decimal::detail::numeric_limits<typename detail::iterator_detail::iterator_traits<ResultIterator>::value_type>::digits == boost::decimal::detail::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorLeft>::value_type>::digits)
+      && (boost::decimal::detail::numeric_limits<typename detail::iterator_detail::iterator_traits<ResultIterator>::value_type>::digits == boost::decimal::detail::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorRight>::value_type>::digits),
       "Error: Internals require same widths for left-right-result limb_types at the moment"
     );
 
     using local_limb_type = typename detail::iterator_detail::iterator_traits<ResultIterator>::value_type;
 
     using local_double_limb_type =
-      typename detail::uint_type_helper<static_cast<size_t>(static_cast<int>(std::numeric_limits<local_limb_type>::digits * static_cast<int>(INT8_C(2))))>::exact_unsigned_type;
+      typename detail::uint_type_helper<static_cast<size_t>(static_cast<int>(boost::decimal::detail::numeric_limits<local_limb_type>::digits * static_cast<int>(INT8_C(2))))>::exact_unsigned_type;
 
     using result_difference_type = typename detail::iterator_detail::iterator_traits<ResultIterator>::difference_type;
     using left_difference_type   = typename detail::iterator_detail::iterator_traits<InputIteratorLeft>::difference_type;
@@ -1077,13 +1093,14 @@ class uintwide_t
 private:
   representation_type values { };
 
-  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR auto compare(const uintwide_t& other) const -> std::int_fast8_t
+  BOOST_DECIMAL_WIDE_INTEGER_NODISCARD BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR BOOST_DECIMAL_GPU_ENABLED auto compare(const uintwide_t& other) const -> std::int_fast8_t
   {
     return compare_ranges(values.cbegin(), other.values.cbegin(), number_of_limbs);
   }
 
   template<typename InputIteratorLeftType,
            typename InputIteratorRightType>
+  BOOST_DECIMAL_GPU_ENABLED
   static BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR auto compare_ranges(      InputIteratorLeftType  a,
                                                                         InputIteratorRightType b,
                                                                   const unsigned_fast_type     count) -> std::int_fast8_t
@@ -1120,6 +1137,7 @@ class uintwide_t
   template<typename ResultIterator,
            typename InputIteratorLeft,
            typename InputIteratorRight>
+  BOOST_DECIMAL_GPU_ENABLED
   static BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR auto eval_add_n(      ResultIterator     r,
                                                                     InputIteratorLeft  u,
                                                                     InputIteratorRight v,
@@ -1132,13 +1150,13 @@ class uintwide_t
 
     static_assert
     (
-         (std::numeric_limits<local_limb_type>::digits == std::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorLeft>::value_type>::digits)
-      && (std::numeric_limits<local_limb_type>::digits == std::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorRight>::value_type>::digits),
+         (boost::decimal::detail::numeric_limits<local_limb_type>::digits == boost::decimal::detail::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorLeft>::value_type>::digits)
+      && (boost::decimal::detail::numeric_limits<local_limb_type>::digits == boost::decimal::detail::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorRight>::value_type>::digits),
       "Error: Internals require same widths for left-right-result limb_types at the moment"
     );
 
     using local_double_limb_type =
-      typename detail::uint_type_helper<static_cast<size_t>(std::numeric_limits<local_limb_type>::digits * 2)>::exact_unsigned_type;
+      typename detail::uint_type_helper<static_cast<size_t>(boost::decimal::detail::numeric_limits<local_limb_type>::digits * 2)>::exact_unsigned_type;
 
     using result_difference_type = typename detail::iterator_detail::iterator_traits<ResultIterator>::difference_type;
 
@@ -1162,6 +1180,7 @@ class uintwide_t
   template<typename ResultIterator,
            typename InputIteratorLeft,
            typename InputIteratorRight>
+  BOOST_DECIMAL_GPU_ENABLED
   static BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR auto eval_subtract_n(      ResultIterator     r,
                                                                          InputIteratorLeft  u,
                                                                          InputIteratorRight v,
@@ -1179,13 +1198,13 @@ class uintwide_t
 
     static_assert
     (
-         (std::numeric_limits<local_limb_type>::digits == std::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorLeft>::value_type>::digits)
-      && (std::numeric_limits<local_limb_type>::digits == std::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorRight>::value_type>::digits),
+         (boost::decimal::detail::numeric_limits<local_limb_type>::digits == boost::decimal::detail::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorLeft>::value_type>::digits)
+      && (boost::decimal::detail::numeric_limits<local_limb_type>::digits == boost::decimal::detail::numeric_limits<typename detail::iterator_detail::iterator_traits<InputIteratorRight>::value_type>::digits),
       "Error: Internals require same widths for left-right-result limb_types at the moment"
     );
 
     using local_double_limb_type =
-      typename detail::uint_type_helper<static_cast<size_t>(std::numeric_limits<local_limb_type>::digits * 2)>::exact_unsigned_type;
+      typename detail::uint_type_helper<static_cast<size_t>(boost::decimal::detail::numeric_limits<local_limb_type>::digits * 2)>::exact_unsigned_type;
 
     using result_difference_type = typename detail::iterator_detail::iterator_traits<ResultIterator>::difference_type;
 
@@ -1212,6 +1231,7 @@ class uintwide_t
     return (has_borrow_out != static_cast<std::uint_fast8_t>(UINT8_C(0)));
   }
 
+  BOOST_DECIMAL_GPU_ENABLED
   BOOST_DECIMAL_WIDE_INTEGER_CONSTEXPR auto eval_divide_knuth_core(const unsigned_fast_type u_offset, // NOLINT(readability-function-cognitive-complexity)
                                                                    const unsigned_fast_type v_offset,
                                                                    const uintwide_t& other,
@@ -1234,7 +1254,7 @@ class uintwide_t
     const auto d =
       static_cast<limb_type>
       (
-          static_cast<double_limb_type>(static_cast<double_limb_type>(UINT8_C(1)) << static_cast<unsigned>(std::numeric_limits<limb_type>::digits))
+          static_cast<double_limb_type>(static_cast<double_limb_type>(UINT8_C(1)) << static_cast<unsigned>(boost::decimal::detail::numeric_limits<limb_type>::digits))
         / static_cast<double_limb_type>(static_cast<double_limb_type>(*detail::advance_and_point(other.values.cbegin(), static_cast<size_t>(static_cast<local_uint_index_type>(number_of_limbs - 1U) - v_offset))) + static_cast<limb_type>(1U))
       );
 
@@ -1311,13 +1331,13 @@ class uintwide_t
       //     set q_hat = (u[j] * b + u[j + 1]) / v[1]
 
       const auto uj     = static_cast<local_uint_index_type>(static_cast<local_uint_index_type>(static_cast<local_uint_index_type>(static_cast<local_uint_index_type>(number_of_limbs + 1U) - 1U) - u_offset) - j);
-      const auto u_j_j1 = static_cast<double_limb_type>(static_cast<double_limb_type>(static_cast<double_limb_type>(*(uu.cbegin() + static_cast<size_t>(uj))) << static_cast<unsigned>(std::numeric_limits<limb_type>::digits)) + *(uu.cbegin() + static_cast<size_t>(uj - 1U)));
+      const auto u_j_j1 = static_cast<double_limb_type>(static_cast<double_limb_type>(static_cast<double_limb_type>(*(uu.cbegin() + static_cast<size_t>(uj))) << static_cast<unsigned>(boost::decimal::detail::numeric_limits<limb_type>::digits)) + *(uu.cbegin() + static_cast<size_t>(uj - 1U)));
 
       auto q_hat =
         static_cast<limb_type>
         (
           (*(uu.cbegin() + static_cast<size_t>(uj)) == vv_at_vj0)
-            ? (std::numeric_limits<limb_type>::max)()
+            ? (boost::decimal::detail::numeric_limits<limb_type>::max)()
             : static_cast<limb_type>(u_j_j1 / vv_at_vj0)
         );
 
@@ -1332,7 +1352,7 @@ class uintwide_t
       {
         if(   (detail::make_hi<limb_type>(t) != static_cast<limb_type>(UINT8_C(0)))
             || (   static_cast<double_limb_type>(static_cast<double_limb_type>(vv_at_vj0_minus_one) * q_hat)
-                <= static_cast<double_limb_type>(static_cast<double_limb_type>(t << static_cast<unsigned>(std::numeric_limits<limb_type>::digits)) + *detail::advance_and_point(uu.cbegin(), static_cast<size_t>(uj - 2U)))))
+                <= static_cast<double_limb_type>(static_cast<double_limb_type>(t << static_cast<unsigned>(boost::decimal::detail::numeric_limits<limb_type>::digits)) + *detail::advance_and_point(uu.cbegin(), static_cast<size_t>(uj - 2U)))))
         {
           break;
         }
@@ -1437,7 +1457,7 @@ class uintwide_t
                 *(uu.cbegin() + static_cast<size_t>(ul))
               + static_cast<double_limb_type>
                 (
-                  static_cast<double_limb_type>(previous_u) << static_cast<unsigned>(std::numeric_limits<limb_type>::digits)
+                  static_cast<double_limb_type>(previous_u) << static_cast<unsigned>(boost::decimal::detail::numeric_limits<limb_type>::digits)
                 )
             );
 
diff --git a/include/boost/decimal/fenv.hpp b/include/boost/decimal/fenv.hpp
index e46b44b43..bbed9d809 100644
--- a/include/boost/decimal/fenv.hpp
+++ b/include/boost/decimal/fenv.hpp
@@ -7,7 +7,7 @@
 
 #include <boost/decimal/detail/config.hpp>
 
-#ifndef BOOST_DECIMAL_BUILD_MODULE
+#if !defined(BOOST_DECIMAL_BUILD_MODULE) && !defined(BOOST_MATH_CUDA_ENABLED)
 #include <cfenv>
 #endif
 
@@ -24,6 +24,8 @@ BOOST_DECIMAL_EXPORT enum class rounding_mode : unsigned
     fe_dec_default = fe_dec_to_nearest_from_zero
 };
 
+#ifndef BOOST_DECIMAL_ENABLE_CUDA
+
 BOOST_DECIMAL_INLINE_VARIABLE rounding_mode _boost_decimal_global_rounding_mode {rounding_mode::fe_dec_default};
 
 BOOST_DECIMAL_EXPORT inline auto fegetround() noexcept -> rounding_mode
@@ -40,6 +42,22 @@ BOOST_DECIMAL_EXPORT inline auto fesetround(rounding_mode round) noexcept -> rou
     return round;
 }
 
+#else
+
+// We can't maintain global state so always use the default rounding mode
+
+BOOST_DECIMAL_GPU_ENABLED inline auto fegetround() noexcept -> rounding_mode
+{
+    return rounding_mode::fe_dec_default;
+}
+
+BOOST_DECIMAL_GPU_ENABLED inline auto fesetround(rounding_mode) noexcept -> rounding_mode
+{
+    return rounding_mode::fe_dec_default;
+}
+
+#endif
+
 } // namespace decimal
 } // namespace boost
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index eed71eaa6..e39866425 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -7,6 +7,30 @@ include(BoostTestJamfile OPTIONAL RESULT_VARIABLE HAVE_BOOST_TEST)
 
 if(HAVE_BOOST_TEST)
 
-    boost_test_jamfile(FILE Jamfile LINK_LIBRARIES Boost::decimal Boost::core Boost::math Boost::multiprecision Boost::charconv Boost::type_traits)
+    if(BOOST_DECIMAL_ENABLE_CUDA)
+
+        message(STATUS "Building boost.decimal with CUDA")
+        enable_language(CUDA)
+        set(CMAKE_CUDA_EXTENSIONS OFF)
+
+        enable_testing()
+
+        boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::decimal Boost::core Boost::math Boost::multiprecision Boost::charconv Boost::type_traits )
+
+    elseif(BOOST_DECIMAL_ENABLE_SYCL)
+
+        message(STATUS "Building boost.decimal with SYCL")
+
+        set(CMAKE_CXX_COMPILER "icpx")
+        set(CMAKE_C_COMPILER "icx")
+
+        boost_test_jamfile(FILE sycl_jamfile LINK_LIBRARIES Boost::decimal Boost::core Boost::math Boost::multiprecision Boost::charconv Boost::type_traits sycl COMPILE_OPTIONS -fsycl  )
+
+    else()
+
+        enable_testing()
+        boost_test_jamfile(FILE Jamfile LINK_LIBRARIES Boost::decimal Boost::core Boost::math Boost::multiprecision Boost::charconv Boost::type_traits )
+
+    endif()
 
 endif()
diff --git a/test/cuda_jamfile b/test/cuda_jamfile
new file mode 100644
index 000000000..5d7475916
--- /dev/null
+++ b/test/cuda_jamfile
@@ -0,0 +1,17 @@
+# Copyright 2024 Matt Borland
+# Distributed under the Boost Software License, Version 1.0.
+# https://www.boost.org/LICENSE_1_0.txt
+
+import testing ;
+import ../../config/checks/config : requires ;
+
+project : requirements
+    [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
+    ;
+
+# Basic Operations
+run test_decimal32_fast_add.cu ;
+run test_decimal32_fast_div.cu ;
+run test_decimal32_fast_mul.cu ;
+run test_decimal32_fast_sub.cu ;
+run test_decimal32_fast_non_finite.cu ;
diff --git a/test/cuda_managed_ptr.hpp b/test/cuda_managed_ptr.hpp
new file mode 100644
index 000000000..3d0f3e800
--- /dev/null
+++ b/test/cuda_managed_ptr.hpp
@@ -0,0 +1,139 @@
+
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_MATH_CUDA_MANAGED_PTR_HPP
+#define BOOST_MATH_CUDA_MANAGED_PTR_HPP
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+#include <cuda_runtime.h>
+
+class managed_holder_base
+{
+protected:
+   static int count;
+   managed_holder_base() { ++count; }
+   ~managed_holder_base()
+   {
+      if(0 == --count)
+         cudaDeviceSynchronize();
+   }
+};
+
+int managed_holder_base::count = 0;
+
+//
+// Reset the device and exit:
+// cudaDeviceReset causes the driver to clean up all state. While
+// not mandatory in normal operation, it is good practice.  It is also
+// needed to ensure correct operation when the application is being
+// profiled. Calling cudaDeviceReset causes all profile data to be
+// flushed before the application exits.
+//
+// We have a global instance of this class, plus instances for each
+// managed pointer.  Last one out the door switches the lights off.
+//
+class cudaResetter
+{
+   static int count;
+public:
+   cudaResetter() { ++count;  }
+   ~cudaResetter()
+   {
+      if(--count == 0)
+      {
+         cudaError_t err = cudaDeviceReset();
+         if(err != cudaSuccess)
+         {
+            std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl;
+         }
+      }
+   }
+};
+
+int cudaResetter::count = 0;
+
+cudaResetter global_resetter;
+
+template <class T>
+class cuda_managed_ptr
+{
+   T* data;
+   static const cudaResetter resetter;
+   cuda_managed_ptr(const cuda_managed_ptr&) = delete;
+   cuda_managed_ptr& operator=(cuda_managed_ptr const&) = delete;
+   void free()
+   {
+      if(data)
+      {
+         cudaDeviceSynchronize();
+         cudaError_t err = cudaFree(data);
+         if(err != cudaSuccess)
+         {
+            std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl;
+         }
+      }
+   }
+public:
+   cuda_managed_ptr() : data(0) {}
+   cuda_managed_ptr(std::size_t n)
+   {
+      cudaError_t err = cudaSuccess;
+      void *ptr;
+      err = cudaMallocManaged(&ptr, n * sizeof(T));
+      if(err != cudaSuccess)
+         throw std::runtime_error(cudaGetErrorString(err));
+      cudaDeviceSynchronize();
+      data = static_cast<T*>(ptr);
+   }
+   cuda_managed_ptr(cuda_managed_ptr&& o)
+   {
+      data = o.data;
+      o.data = 0;
+   }
+   cuda_managed_ptr& operator=(cuda_managed_ptr&& o)
+   {
+      free();
+      data = o.data;
+      o.data = 0;
+      return *this;
+   }
+   ~cuda_managed_ptr()
+   {
+      free();
+   }
+
+   class managed_holder : managed_holder_base
+   {
+      T* pdata;
+   public:
+      managed_holder(T* p) : managed_holder_base(), pdata(p) {}
+      managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {}
+      operator T* () { return pdata; }
+      T& operator[] (std::size_t n) { return pdata[n]; }
+   };
+   class const_managed_holder : managed_holder_base
+   {
+      const T* pdata;
+   public:
+      const_managed_holder(T* p) : managed_holder_base(), pdata(p) {}
+      const_managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {}
+      operator const T* () { return pdata; }
+      const T& operator[] (std::size_t n) { return pdata[n]; }
+   };
+
+   managed_holder get() { return managed_holder(data); }
+   const_managed_holder get()const { return data; }
+   T& operator[](std::size_t n) { return data[n]; }
+   const T& operator[](std::size_t n)const { return data[n]; }
+};
+
+template <class T>
+cudaResetter const cuda_managed_ptr<T>::resetter;
+
+#endif
diff --git a/test/stopwatch.hpp b/test/stopwatch.hpp
new file mode 100644
index 000000000..9f3c60de8
--- /dev/null
+++ b/test/stopwatch.hpp
@@ -0,0 +1,39 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_MATH_CUDA_STOPWATCH_HPP
+#define BOOST_MATH_CUDA_STOPWATCH_HPP
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+#include <chrono>
+
+template <class Clock>
+struct stopwatch
+{
+    typedef typename Clock::duration duration;
+    stopwatch()
+    {
+        m_start = Clock::now();
+    }
+    double elapsed()
+    {
+        duration t = Clock::now() - m_start;
+        return std::chrono::duration_cast<std::chrono::duration<double>>(t).count();
+    }
+    void reset()
+    {
+        m_start = Clock::now();
+    }
+
+private:
+    typename Clock::time_point m_start;
+};
+
+typedef stopwatch<std::chrono::high_resolution_clock> watch;
+
+#endif
diff --git a/test/sycl_jamfile b/test/sycl_jamfile
new file mode 100644
index 000000000..ae83c3150
--- /dev/null
+++ b/test/sycl_jamfile
@@ -0,0 +1,13 @@
+# Copyright 2024 Matt Borland
+# Distributed under the Boost Software License, Version 1.0.
+# https://www.boost.org/LICENSE_1_0.txt
+
+import testing ;
+import ../../config/checks/config : requires ;
+
+project : requirements
+    [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
+    ;
+
+# Basic Operations
+run random_decimal32_fast_comp.cpp ;
\ No newline at end of file
diff --git a/test/test_decimal32_fast_add.cu b/test/test_decimal32_fast_add.cu
new file mode 100644
index 000000000..e3192e869
--- /dev/null
+++ b/test/test_decimal32_fast_add.cu
@@ -0,0 +1,109 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma nv_diag_suppress 186
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <boost/decimal.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using float_type = boost::decimal::decimal32_fast;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = in[i] + in[i];
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::mt19937_64 rng(42);
+    std::uniform_int_distribution<int> dist(-1000, 1000);
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = static_cast<float_type>(dist(rng));
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(input_vector[i] + input_vector[i]);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                      << "Cuda: " << output_vector[i] << '\n'
+                      << "Serial: " << results[i] << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_decimal32_fast_div.cu b/test/test_decimal32_fast_div.cu
new file mode 100644
index 000000000..b0c503304
--- /dev/null
+++ b/test/test_decimal32_fast_div.cu
@@ -0,0 +1,113 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma nv_diag_suppress 186
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <boost/decimal.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using float_type = boost::decimal::decimal32_fast;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type* in1, const float_type* in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = in1[i] / in2[i];
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::mt19937_64 rng(42);
+    std::uniform_int_distribution<int> dist(-1000, 1000);
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = static_cast<float_type>(dist(rng));
+        input_vector2[i] = static_cast<float_type>(dist(rng));
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(input_vector1[i] / input_vector2[i]);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                      << "Cuda: " << output_vector[i] << '\n'
+                      << "Serial: " << results[i] << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_decimal32_fast_mul.cu b/test/test_decimal32_fast_mul.cu
new file mode 100644
index 000000000..b5d6b6cb4
--- /dev/null
+++ b/test/test_decimal32_fast_mul.cu
@@ -0,0 +1,109 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma nv_diag_suppress 186
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <boost/decimal.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using float_type = boost::decimal::decimal32_fast;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = in[i] * in[i];
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::mt19937_64 rng(42);
+    std::uniform_int_distribution<int> dist(-1000, 1000);
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = static_cast<float_type>(dist(rng));
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(input_vector[i] * input_vector[i]);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                      << "Cuda: " << output_vector[i] << '\n'
+                      << "Serial: " << results[i] << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_decimal32_fast_non_finite.cu b/test/test_decimal32_fast_non_finite.cu
new file mode 100644
index 000000000..c672cbaa6
--- /dev/null
+++ b/test/test_decimal32_fast_non_finite.cu
@@ -0,0 +1,106 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma nv_diag_suppress 186
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/decimal.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using float_type = boost::decimal::decimal32_fast;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, int *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = static_cast<int>(signbit(in[i]) + isinf(in[i]) + isnan(in[i]) + issignaling(in[i]) + isnormal(in[i]) + isfinite(in[i]));
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<int> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = static_cast<float_type>(rand());
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<int> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(static_cast<int>(signbit(input_vector[i]) + isinf(input_vector[i]) + isnan(input_vector[i]) + issignaling(input_vector[i]) + isnormal(input_vector[i]) + isfinite(input_vector[i])));
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                      << "Cuda: " << output_vector[i] << '\n'
+                      << "Serial: " << results[i] << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_decimal32_fast_sub.cu b/test/test_decimal32_fast_sub.cu
new file mode 100644
index 000000000..cf3d15472
--- /dev/null
+++ b/test/test_decimal32_fast_sub.cu
@@ -0,0 +1,109 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma nv_diag_suppress 186
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <boost/decimal.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using float_type = boost::decimal::decimal32_fast;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = in[i] - in[i];
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::mt19937_64 rng(42);
+    std::uniform_int_distribution<int> dist(-1000, 1000);
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = static_cast<float_type>(dist(rng));
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(input_vector[i] - input_vector[i]);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                      << "Cuda: " << output_vector[i] << '\n'
+                      << "Serial: " << results[i] << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}