diff --git a/dash/include/dash/algorithm/Copy.h b/dash/include/dash/algorithm/Copy.h index faaa03b26..4c260ed4f 100644 --- a/dash/include/dash/algorithm/Copy.h +++ b/dash/include/dash/algorithm/Copy.h @@ -624,25 +624,154 @@ copy_async( } #endif +struct ActiveDestination{}; +struct ActiveSource{}; + +/** + * Specialization of \c dash::copy as global-to-global blocking copy + * operation. + * + * \ingroup DashAlgorithms + */ +template < + class GlobInputIt, + class GlobOutputIt, + typename ValueType = typename GlobInputIt::value_type> +GlobOutputIt copy( + GlobInputIt in_first, + GlobInputIt in_last, + GlobOutputIt out_first, + ActiveDestination /*unused*/) +{ + DASH_LOG_TRACE("dash::copy()", "blocking, global to global"); + + typedef typename GlobInputIt::size_type size_type; + + size_type num_elem_total = dash::distance(in_first, in_last); + if (num_elem_total <= 0) { + DASH_LOG_TRACE("dash::copy", "input range empty"); + return out_first; + } + + auto g_out_first = out_first; + auto g_out_last = g_out_first + num_elem_total; + + internal::ContiguousRangeSet range_set{g_out_first, g_out_last}; + + const auto & out_team = out_first.team(); + out_team.barrier(); + + std::vector handles; + internal::local_copy_chunks local_chunks; + + size_type num_elem_processed = 0; + + for (auto range : range_set) { + + auto cur_out_first = range.first; + auto num_copy_elem = range.second; + + DASH_ASSERT_GT(num_copy_elem, 0, + "Number of elements to copy is 0"); + + // handle local data only + if (cur_out_first.is_local()) { + auto dest_ptr = cur_out_first.local(); + auto src_ptr = in_first + num_elem_processed; + internal::copy_impl(src_ptr, + src_ptr + num_copy_elem, + dest_ptr, + &handles, + local_chunks); + } + num_elem_processed += num_copy_elem; + } + + internal::do_local_copies(local_chunks); + + if (!handles.empty()) { + DASH_LOG_TRACE("dash::copy", "Waiting for remote transfers to complete,", + "num_handles: ", handles.size()); + dart_waitall_local(handles.data(), handles.size()); + } + out_team.barrier(); + + DASH_ASSERT_EQ(num_elem_processed, num_elem_total, + "Failed to find all contiguous subranges in range"); + + return g_out_last; +} + /** * Specialization of \c dash::copy as global-to-global blocking copy * operation. * * \ingroup DashAlgorithms */ -template +template < + class GlobInputIt, + class GlobOutputIt, + typename ValueType = typename GlobInputIt::value_type> GlobOutputIt copy( - GlobInputIt /*in_first*/, - GlobInputIt /*in_last*/, - GlobOutputIt /*out_first*/) + GlobInputIt in_first, + GlobInputIt in_last, + GlobOutputIt out_first, + ActiveSource /*unused*/) { DASH_LOG_TRACE("dash::copy()", "blocking, global to global"); - // TODO: - // - Implement adapter for local-to-global dash::copy here - // - Return if global input range has no local sub-range + typedef typename GlobInputIt::size_type size_type; + + size_type num_elem_total = dash::distance(in_first, in_last); + if (num_elem_total <= 0) { + DASH_LOG_TRACE("dash::copy", "input range empty"); + return out_first; + } + + internal::ContiguousRangeSet range_set{in_first, in_last}; + + const auto & in_team = in_first.team(); + in_team.barrier(); + + std::vector handles; + internal::local_copy_chunks local_chunks; + + size_type num_elem_processed = 0; + + for (auto range : range_set) { + + auto cur_in_first = range.first; + auto num_copy_elem = range.second; + + DASH_ASSERT_GT(num_copy_elem, 0, + "Number of elements to copy is 0"); + + // handle local data only + if (cur_in_first.is_local()) { + auto src_ptr = cur_in_first.local(); + auto dest_ptr = out_first + num_elem_processed; + internal::copy_impl(src_ptr, + src_ptr + num_copy_elem, + dest_ptr, + &handles, + local_chunks); + } + num_elem_processed += num_copy_elem; + } + + internal::do_local_copies(local_chunks); + + if (!handles.empty()) { + DASH_LOG_TRACE("dash::copy", "Waiting for remote transfers to complete,", + "num_handles: ", handles.size()); + dart_waitall(handles.data(), handles.size()); + } + in_team.barrier(); + + DASH_ASSERT_EQ(num_elem_processed, num_elem_total, + "Failed to find all contiguous subranges in range"); - return GlobOutputIt(); + return out_first + num_elem_total; } #endif // DOXYGEN