From a6cbb0a2de3777e9c388e25cf12e4cec4af48207 Mon Sep 17 00:00:00 2001 From: koparasy Date: Wed, 1 Nov 2023 21:23:24 -0700 Subject: [PATCH 1/7] removed default resource static member and explicitly pass the data location --- examples/main.cpp | 16 +++---- src/AMS.cpp | 11 +---- src/ml/hdcache.hpp | 88 ++++++++++++++----------------------- src/ml/surrogate.hpp | 17 ++++--- src/wf/data_handler.hpp | 34 +++++++------- src/wf/resource_manager.cpp | 59 ------------------------- src/wf/resource_manager.hpp | 38 +--------------- src/wf/workflow.hpp | 52 +++++++++------------- tests/ams_allocate.cpp | 34 ++------------ tests/cpu_packing_test.cpp | 30 ++++++------- tests/test_hdcache.cpp | 10 ++--- tests/torch_model.cpp | 7 +-- 12 files changed, 116 insertions(+), 280 deletions(-) diff --git a/examples/main.cpp b/examples/main.cpp index 90478fc1..f9f059fa 100644 --- a/examples/main.cpp +++ b/examples/main.cpp @@ -308,14 +308,14 @@ int main(int argc, char **argv) // ------------------------------------------------------------------------- // setup data allocators // ------------------------------------------------------------------------- - AMSSetupAllocator(AMSResourceType::HOST); - if (use_device) { - AMSSetupAllocator(AMSResourceType::DEVICE); - AMSSetupAllocator(AMSResourceType::PINNED); - AMSSetDefaultAllocator(AMSResourceType::DEVICE); - } else { - AMSSetDefaultAllocator(AMSResourceType::HOST); - } +// AMSSetupAllocator(AMSResourceType::HOST); +// if (use_device) { +// AMSSetupAllocator(AMSResourceType::DEVICE); +// AMSSetupAllocator(AMSResourceType::PINNED); +// AMSSetDefaultAllocator(AMSResourceType::DEVICE); +// } else { +// AMSSetDefaultAllocator(AMSResourceType::HOST); +// } // ------------------------------------------------------------------------- // setup mfem memory manager diff --git a/src/AMS.cpp b/src/AMS.cpp index de7ad6b4..24e623fb 100644 --- a/src/AMS.cpp +++ b/src/AMS.cpp @@ -81,7 +81,7 @@ AMSExecutor AMSCreateExecutor(const AMSConfig config) config.SPath, config.DBPath, config.dbType, - config.device == AMSResourceType::HOST, + AMSResourceType::HOST, config.threshold, config.uqPolicy, config.nClusters, @@ -98,7 +98,7 @@ AMSExecutor AMSCreateExecutor(const AMSConfig config) config.SPath, config.DBPath, config.dbType, - config.device == AMSResourceType::HOST, + AMSResourceType::HOST, static_cast(config.threshold), config.uqPolicy, config.nClusters, @@ -171,18 +171,11 @@ void AMSSetupAllocator(const AMSResourceType Resource) ams::ResourceManager::setup(Resource); } -void AMSResourceInfo() { ams::ResourceManager::list_allocators(); } - int AMSGetLocationId(void *ptr) { return ams::ResourceManager::getDataAllocationId(ptr); } -void AMSSetDefaultAllocator(const AMSResourceType device) -{ - ams::ResourceManager::setDefaultDataAllocator(device); -} - #ifdef __cplusplus } #endif diff --git a/src/ml/hdcache.hpp b/src/ml/hdcache.hpp index 177c81ed..a3d84c06 100644 --- a/src/ml/hdcache.hpp +++ b/src/ml/hdcache.hpp @@ -68,11 +68,10 @@ class HDCache const uint8_t m_dim; const bool m_use_random; - const bool m_use_device; const int m_knbrs = 0; const AMSUQPolicy m_policy = AMSUQPolicy::FAISSMean; - AMSResourceType defaultRes; + AMSResourceType cache_location; const TypeValue acceptable_error; @@ -97,22 +96,20 @@ class HDCache //! ------------------------------------------------------------------------ //! constructors //! ------------------------------------------------------------------------ - HDCache(bool use_device, TypeInValue threshold = 0.5) + HDCache(AMSResourceType resource, TypeInValue threshold = 0.5) : m_index(nullptr), m_dim(0), m_use_random(true), m_knbrs(-1), - m_use_device(use_device), + cache_location(resource), acceptable_error(threshold) { - defaultRes = - (m_use_device) ? AMSResourceType::DEVICE : AMSResourceType::HOST; print(); } #ifdef __ENABLE_FAISS__ HDCache(const std::string &cache_path, - bool use_device, + AMSResourceType resource, const AMSUQPolicy uqPolicy, int knbrs, TypeInValue threshold = 0.5) @@ -121,14 +118,12 @@ class HDCache m_use_random(false), m_knbrs(knbrs), m_policy(uqPolicy), - m_use_device(use_device), + cache_location(resource), acceptable_error(threshold) { - defaultRes = - (m_use_device) ? AMSResourceType::DEVICE : AMSResourceType::HOST; #ifdef __ENABLE_CUDA__ // Copy index to device side - if (use_device) { + if (cache_location == AMSRAMSResourceType::DEVICE) { faiss::gpu::GpuClonerOptions copyOptions; faiss::gpu::ToGpuCloner cloner(&res, 0, copyOptions); m_index = cloner.clone_Index(m_index); @@ -138,7 +133,8 @@ class HDCache } #else // Disabled FAISS HDCache(const std::string &cache_path, - bool use_device, + int knbrs, + AMSRAMSResourceType resource, const AMSUQPolicy uqPolicy, int knbrs, TypeInValue threshold = 0.5) @@ -147,11 +143,9 @@ class HDCache m_use_random(false), m_knbrs(knbrs), m_policy(uqPolicy), - m_use_device(use_device), + cache_location(resource), acceptable_error(threshold) { - defaultRes = - (m_use_device) ? AMSResourceType::DEVICE : AMSResourceType::HOST; WARNING(UQModule, "Ignoring cache path because FAISS is not available") print(); } @@ -160,7 +154,7 @@ class HDCache public: static std::shared_ptr> find_cache( const std::string &cache_path, - bool use_device, + AMSResourceType resource, const AMSUQPolicy uqPolicy, int knbrs, TypeInValue threshold = 0.5) @@ -170,7 +164,7 @@ class HDCache if (model != instances.end()) { // Model Found auto cache = model->second; - if (use_device != cache->m_use_device) + if (resource != cache->cache_location) throw std::runtime_error( "Currently we do not support loading the same index on different " "devices."); @@ -201,7 +195,7 @@ class HDCache static std::shared_ptr> getInstance( const std::string &cache_path, - bool use_device, + AMSResourceType resource, const AMSUQPolicy uqPolicy, int knbrs, TypeInValue threshold = 0.5) @@ -210,7 +204,7 @@ class HDCache // Cache does not exist. We need to create one // std::shared_ptr> cache = - find_cache(cache_path, use_device, uqPolicy, knbrs, threshold); + find_cache(cache_path, resource, uqPolicy, knbrs, threshold); if (cache) { DBG(UQModule, "Returning existing cache under (%s)", cache_path.c_str()) return cache; @@ -219,19 +213,19 @@ class HDCache DBG(UQModule, "Generating new cache under (%s)", cache_path.c_str()) std::shared_ptr> new_cache = std::shared_ptr>(new HDCache( - cache_path, use_device, uqPolicy, knbrs, threshold)); + cache_path, resource, uqPolicy, knbrs, threshold)); instances.insert(std::make_pair(cache_path, new_cache)); return new_cache; } static std::shared_ptr> getInstance( - bool use_device, + AMSResourceType resource, float threshold = 0.5) { static std::string random_path("random"); std::shared_ptr> cache = find_cache( - random_path, use_device, AMSUQPolicy::FAISSMean, -1, threshold); + random_path, resource, AMSUQPolicy::FAISSMean, -1, threshold); if (cache) { DBG(UQModule, "Returning existing cache under (%s)", random_path.c_str()) return cache; @@ -243,7 +237,7 @@ class HDCache threshold) std::shared_ptr> new_cache = std::shared_ptr>( - new HDCache(use_device, threshold)); + new HDCache(resource, threshold)); instances.insert(std::make_pair(random_path, new_cache)); return new_cache; @@ -272,7 +266,7 @@ class HDCache } DBG(UQModule, "HDCache (on_device = %d random = %d %s)", - m_use_device, + cache_location, m_use_random, info.c_str()); } @@ -349,9 +343,9 @@ class HDCache !has_index(), "HDCache does not have a valid and trained index!") - TypeValue *lin_data = data_handler::linearize_features(ndata, inputs); + TypeValue *lin_data = data_handler::linearize_features(cache_location, ndata, inputs); _add(ndata, lin_data); - ams::ResourceManager::deallocate(lin_data, defaultRes); + ams::ResourceManager::deallocate(lin_data, cache_location); } //! ----------------------------------------------------------------------- @@ -377,9 +371,9 @@ class HDCache void train(const size_t ndata, const std::vector &inputs) { if (m_use_random) return; - TypeValue *lin_data = data_handler::linearize_features(ndata, inputs); + TypeValue *lin_data = data_handler::linearize_features(cache_location, ndata, inputs); _train(ndata, lin_data); - ams::ResourceManager::deallocate(lin_data, defaultRes); + ams::ResourceManager::deallocate(lin_data, cache_location); } //! ------------------------------------------------------------------------ @@ -439,9 +433,9 @@ class HDCache if (m_use_random) { _evaluate(ndata, is_acceptable); } else { - TypeValue *lin_data = data_handler::linearize_features(ndata, inputs); + TypeValue *lin_data = data_handler::linearize_features(cache_location, ndata, inputs); _evaluate(ndata, lin_data, is_acceptable); - ams::ResourceManager::deallocate(lin_data, defaultRes); + ams::ResourceManager::deallocate(lin_data, cache_location); } DBG(UQModule, "Done with evalution of uq"); } @@ -469,7 +463,7 @@ class HDCache PERFFASPECT() inline void _add(const size_t ndata, const T *data) { - TypeValue *vdata = data_handler::cast_to_typevalue(ndata, data); + TypeValue *vdata = data_handler::cast_to_typevalue(cache_location, ndata, data); _add(ndata, vdata); delete[] vdata; } @@ -501,7 +495,7 @@ class HDCache PERFFASPECT() inline void _train(const size_t ndata, const T *data) { - TypeValue *vdata = data_handler::cast_to_typevalue(ndata, data); + TypeValue *vdata = data_handler::cast_to_typevalue(cache_location, ndata, data); _train(ndata, vdata); delete[] vdata; } @@ -517,23 +511,10 @@ class HDCache const size_t knbrs = static_cast(m_knbrs); static const TypeValue ook = 1.0 / TypeValue(knbrs); - const bool input_on_device = ams::ResourceManager::is_on_device(data); - const bool output_on_device = - ams::ResourceManager::is_on_device(is_acceptable); - - if (input_on_device != output_on_device) { - WARNING(UQModule, - "Input is ( on_device: %d)" - " Output is ( on_device: %d)" - " on different devices", - input_on_device, - output_on_device) - } - TypeValue *kdists = - ams::ResourceManager::allocate(ndata * knbrs, defaultRes); + ams::ResourceManager::allocate(ndata * knbrs, cache_location); TypeIndex *kidxs = - ams::ResourceManager::allocate(ndata * knbrs, defaultRes); + ams::ResourceManager::allocate(ndata * knbrs, cache_location); // query faiss // TODO: This is a HACK. When searching more than 65535 @@ -554,7 +535,7 @@ class HDCache #endif // compute means - if (defaultRes == AMSResourceType::HOST) { + if (cache_location== AMSResourceType::HOST) { for (size_t i = 0; i < ndata; ++i) { CFATAL(UQModule, m_policy == AMSUQPolicy::DeltaUQ, @@ -583,8 +564,8 @@ class HDCache kdists, is_acceptable, ndata, knbrs, acceptable_error); } - ams::ResourceManager::deallocate(kdists, defaultRes); - ams::ResourceManager::deallocate(kidxs, defaultRes); + ams::ResourceManager::deallocate(kdists, cache_location); + ams::ResourceManager::deallocate(kidxs, cache_location); } //! evaluate cache uncertainty when (data type != TypeValue) @@ -592,7 +573,7 @@ class HDCache std::enable_if_t::value> * = nullptr> inline void _evaluate(const size_t ndata, T *data, bool *is_acceptable) const { - TypeValue *vdata = data_handler::cast_to_typevalue(ndata, data); + TypeValue *vdata = data_handler::cast_to_typevalue(cache_location, ndata, data); _evaluate(ndata, data, is_acceptable); delete[] vdata; } @@ -624,10 +605,7 @@ class HDCache PERFFASPECT() inline void _evaluate(const size_t ndata, bool *is_acceptable) const { - const bool data_on_device = - ams::ResourceManager::is_on_device(is_acceptable); - - if (data_on_device) { + if ( cache_location == AMSResourceType::HOST ) { #ifdef __ENABLE_CUDA__ random_uq_Device<<<1, 1>>>(is_acceptable, ndata, acceptable_error); #endif diff --git a/src/ml/surrogate.hpp b/src/ml/surrogate.hpp index 16108592..240915fa 100644 --- a/src/ml/surrogate.hpp +++ b/src/ml/surrogate.hpp @@ -38,7 +38,7 @@ class SurrogateModel private: const std::string model_path; - const bool _is_cpu; + AMSResourceType model_resource; #ifdef __ENABLE_TORCH__ @@ -91,7 +91,7 @@ class SurrogateModel // Transpose to get continuous memory and // perform single memcpy. tensor = tensor.transpose(1, 0); - if (_is_cpu) { + if (model_resource == AMSResourceType::HOST) { for (long j = 0; j < numCols; j++) { auto tmp = tensor[j].contiguous(); TypeInValue* ptr = tmp.data_ptr(); @@ -190,11 +190,11 @@ class SurrogateModel #endif - SurrogateModel(const char* model_path, bool is_cpu = true) - : model_path(model_path), _is_cpu(is_cpu) + SurrogateModel(const char* model_path, AMSResourceType resource = AMSResourceType::HOST) + : model_path(model_path), model_resource(resource) { - if (_is_cpu) + if (resource != AMSResourceType::DEVICE) _load(model_path, "cpu"); else _load(model_path, "cuda"); @@ -226,14 +226,14 @@ class SurrogateModel static std::shared_ptr> getInstance( const char* model_path, - bool is_cpu = true) + AMSResourceType resource = AMSResourceType::HOST) { auto model = SurrogateModel::instances.find(std::string(model_path)); if (model != instances.end()) { // Model Found auto torch_model = model->second; - if (is_cpu != torch_model->is_cpu()) + if ( resource != torch_model->model_resource) throw std::runtime_error( "Currently we are not supporting loading the same model file on " "different devices."); @@ -252,7 +252,7 @@ class SurrogateModel DBG(Surrogate, "Generating new model under (%s)", model_path); std::shared_ptr> torch_model = std::shared_ptr>( - new SurrogateModel(model_path, is_cpu)); + new SurrogateModel(model_path, resource)); instances.insert(std::make_pair(std::string(model_path), torch_model)); return torch_model; }; @@ -295,7 +295,6 @@ class SurrogateModel } #endif - bool is_cpu() { return _is_cpu; } }; template diff --git a/src/wf/data_handler.hpp b/src/wf/data_handler.hpp index 98d52394..d7c61f71 100644 --- a/src/wf/data_handler.hpp +++ b/src/wf/data_handler.hpp @@ -44,7 +44,7 @@ class DataHandler template < class TypeInValue, std::enable_if_t::value>* = nullptr> - static inline TypeValue* cast_to_typevalue(const size_t n, TypeInValue* data) + static inline TypeValue* cast_to_typevalue(AMSResourceType resource, const size_t n, TypeInValue* data) { return data; } @@ -66,9 +66,9 @@ class DataHandler template < typename TypeInValue, std::enable_if_t::value>* = nullptr> - static inline TypeValue* cast_to_typevalue(const size_t n, TypeInValue* data) + static inline TypeValue* cast_to_typevalue(AMSResourceType resource, const size_t n, TypeInValue* data) { - TypeValue* fdata = ams::ResourceManager::allocate(n); + TypeValue* fdata = ams::ResourceManager::allocate(resource, n); std::transform(data, data + n, fdata, [&](const TypeInValue& v) { return static_cast(v); }); @@ -135,6 +135,7 @@ class DataHandler template PERFFASPECT() static inline TypeValue* linearize_features( + AMSResourceType resource, const size_t n, const std::vector& features) { @@ -142,12 +143,9 @@ PERFFASPECT() const size_t nfeatures = features.size(); const size_t nvalues = n * nfeatures; - TypeValue* data = ams::ResourceManager::allocate(nvalues); + TypeValue* data = ams::ResourceManager::allocate(nvalues, resource); - const bool features_on_device = - ams::ResourceManager::is_on_device(features[0]); - - if (!features_on_device) { + if (resource == AMSResourceType::HOST) { for (size_t d = 0; d < nfeatures; d++) { for (size_t i = 0; i < n; i++) { data[i * nfeatures + d] = static_cast(features[d][i]); @@ -163,6 +161,7 @@ PERFFASPECT() * vector in the dense vector if the respective index * of the predicate vector is equal to 'denseVal. * + * @param[in] dataLocation Location of the data * @param[in] predicate A boolean vector storing which elements in the vector * should be dropped. * @param[in] n The number of elements of the C-vectors. @@ -174,7 +173,7 @@ PERFFASPECT() * @return Total number of elements stored in the dense vector * */ PERFFASPECT() - static inline size_t pack(const bool* predicate, + static inline size_t pack(AMSResourceType dataLocation, const bool* predicate, const size_t n, std::vector& sparse, std::vector& dense, @@ -186,7 +185,7 @@ PERFFASPECT() size_t npacked = 0; size_t dims = sparse.size(); - if (!ams::ResourceManager::isDeviceExecution()) { + if (dataLocation != AMSResourceType::DEVICE) { for (size_t i = 0; i < n; i++) { if (predicate[i] == denseVal) { for (size_t j = 0; j < dims; j++) @@ -208,6 +207,7 @@ PERFFASPECT() /* @brief The function stores all elements from the dense * vector to the sparse vector. * + * @param[in] dataLocation Location of the data * @param[in] predicate A boolean vector storing which elements in the vector * should be kept. * @param[in] n The number of elements of the C-vectors. @@ -219,7 +219,7 @@ PERFFASPECT() * to be copied to the sparse vectors. * */ PERFFASPECT() - static inline void unpack(const bool* predicate, + static inline void unpack(AMSResourceType dataLocation, const bool* predicate, const size_t n, std::vector& dense, std::vector& sparse, @@ -231,7 +231,7 @@ PERFFASPECT() size_t npacked = 0; size_t dims = sparse.size(); - if (!ams::ResourceManager::isDeviceExecution()) { + if (dataLocation != AMSResourceType::DEVICE) { for (size_t i = 0; i < n; i++) { if (predicate[i] == denseVal) { for (size_t j = 0; j < dims; j++) @@ -254,6 +254,7 @@ PERFFASPECT() * vector in the dense vector if the respective index * of the predicate vector is equal to 'denseVal. * + * @param[in] dataLocation Location of the data * @param[in] predicate A boolean vector storing which elements in the vector * @param[out] sparse_indices A vector storing the mapping from dense elements * to sparse elements. @@ -266,7 +267,7 @@ PERFFASPECT() * @return Total number of elements stored in the dense vector * */ PERFFASPECT() - static inline size_t pack(const bool* predicate, + static inline size_t pack(AMSResourceType dataLocation, const bool* predicate, int* sparse_indices, const size_t n, std::vector& sparse, @@ -280,7 +281,7 @@ PERFFASPECT() size_t npacked = 0; int dims = sparse.size(); - if (!ams::ResourceManager::isDeviceExecution()) { + if (dataLocation != AMSResourceType::DEVICE) { for (size_t i = 0; i < n; i++) { if (predicate[i] == denseVal) { for (size_t j = 0; j < dims; j++) @@ -304,6 +305,7 @@ PERFFASPECT() /* @brief The function copies all elements from the dense * vector to the sparse vector. * + * @param[in] dataLocation Location of the data * @param[in] sparse_indices A vector storing the mapping from sparse to * dense. * @param[in] n The number of elements of the C-vectors. @@ -315,7 +317,7 @@ PERFFASPECT() * to be copied to the sparse vectors. * */ PERFFASPECT() - static inline void unpack(int* sparse_indices, + static inline void unpack(AMSResourceType dataLocation, int* sparse_indices, const size_t nPacked, std::vector& dense, std::vector& sparse, @@ -327,7 +329,7 @@ PERFFASPECT() int dims = sparse.size(); - if (!ams::ResourceManager::isDeviceExecution()) { + if (dataLocation != AMSResourceType::DEVICE) { for (size_t i = 0; i < nPacked; i++) for (size_t j = 0; j < dims; j++) sparse[j][sparse_indices[i]] = dense[j][i]; diff --git a/src/wf/resource_manager.cpp b/src/wf/resource_manager.cpp index 3d79c2f9..370dcea6 100644 --- a/src/wf/resource_manager.cpp +++ b/src/wf/resource_manager.cpp @@ -52,65 +52,6 @@ std::string ResourceManager::allocator_names[AMSResourceType::RSEND] = {"HOST", "DEVICE", "PINNED"}; -// default allocator -AMSResourceType ResourceManager::default_resource = AMSResourceType::HOST; - -//! -------------------------------------------------------------------------- -void ResourceManager::setDefaultDataAllocator(AMSResourceType location) -{ - ResourceManager::default_resource = location; - - auto& rm = umpire::ResourceManager::getInstance(); - auto alloc = rm.getAllocator(allocator_ids[location]); - - DBG(ResourceManager, - "Setting Default Allocator: %d : %s", - alloc.getId(), - alloc.getName().c_str()); - - rm.setDefaultAllocator(alloc); -} - -AMSResourceType ResourceManager::getDefaultDataAllocator() -{ - return ResourceManager::default_resource; -} - -bool ResourceManager::isDeviceExecution() -{ - return ResourceManager::default_resource == AMSResourceType::DEVICE; -} - - -// ----------------------------------------------------------------------------- -// get the list of available allocators -// ----------------------------------------------------------------------------- -void ResourceManager::list_allocators() -{ - - auto& rm = umpire::ResourceManager::getInstance(); - auto alloc_names = rm.getAllocatorNames(); - auto alloc_ids = rm.getAllocatorIds(); - - std::cout << " > Listing data allocators registered with " - "ams::ResourceManager\n"; - for (int i = 0; i < std::max(alloc_ids.size(), alloc_names.size()); i++) { - - if (i < alloc_ids.size() && i < alloc_names.size()) { - std::cout << " [id = " << alloc_ids[i] - << "] name = " << alloc_names[i] << "\n"; - } else if (i < alloc_names.size()) { // id not available - std::cout << " [id = ?] name = " << alloc_names[i] << "\n"; - } else { // name not available - std::cout << " [id = " << alloc_ids[i] << "] name = ?\n"; - } - } - - auto dalloc = rm.getDefaultAllocator(); - std::cout << " > Default allocator = (" << dalloc.getId() << " : " - << dalloc.getName() << ")\n"; -} - // ----------------------------------------------------------------------------- // set up the resource manager diff --git a/src/wf/resource_manager.hpp b/src/wf/resource_manager.hpp index fc2162b5..2e53ad5a 100644 --- a/src/wf/resource_manager.hpp +++ b/src/wf/resource_manager.hpp @@ -28,9 +28,6 @@ class ResourceManager { public: private: - /** @brief Used internally to allocate from the user define default resource (Device, Host Memory) */ - static AMSResourceType default_resource; - /** @brief Used internally to map resource types (Device, host, pinned memory) to * umpire allocator ids. */ static int allocator_ids[AMSResourceType::RSEND]; @@ -60,21 +57,6 @@ class ResourceManager /** @brief setup allocators in the resource manager */ static void setup(const AMSResourceType resource); - /** @brief Print out all available allocators */ - static void list_allocators(); - - /** @brief Get the default memory allocator */ - static AMSResourceType getDefaultDataAllocator(); - - /** @brief Set the default memory allocator */ - static void setDefaultDataAllocator(AMSResourceType resource); - - /** @brief Check if default allocator is set to Device - * @pre The library currently assumes the default memory allocator - * also describes the executing device. - */ - static bool isDeviceExecution(); - /** @brief Check if pointer is allocatd through * @tparam TypeInValue The type of pointer being tested. * @param[in] data pointer to memory. @@ -116,19 +98,6 @@ class ResourceManager : "unknown"; } - /** @brief checks whether the data are resident on the device. - * @tparam TypeInValue The type of pointer being tested. - * @param[in] data pointer to memory. - * @return True when data are on Device. - */ - template - static bool is_on_device(const TypeInValue* data) - { - auto alloc_id = getDataAllocationId(data); - return ResourceManager::isDeviceExecution() && alloc_id != -1 && - alloc_id == allocator_ids[AMSResourceType::DEVICE]; - } - /** @brief Allocates nvalues on the specified device. * @tparam TypeInValue The type of pointer to allocate. * @param[in] nvalues Number of elements to allocate. @@ -137,8 +106,7 @@ class ResourceManager */ template PERFFASPECT() - static TypeInValue* allocate(size_t nvalues, - AMSResourceType dev = default_resource) + static TypeInValue* allocate(size_t nvalues, AMSResourceType dev) { static auto& rm = umpire::ResourceManager::getInstance(); DBG(ResourceManager, @@ -179,9 +147,7 @@ class ResourceManager * @return void. */ PERFFASPECT() - static void registerExternal(void* ptr, - size_t nBytes, - AMSResourceType dev = default_resource) + static void registerExternal(void* ptr, size_t nBytes, AMSResourceType dev) { auto& rm = umpire::ResourceManager::getInstance(); auto alloc = rm.getAllocator(allocator_ids[dev]); diff --git a/src/wf/workflow.hpp b/src/wf/workflow.hpp index bf69ab52..4b0c7b8f 100644 --- a/src/wf/workflow.hpp +++ b/src/wf/workflow.hpp @@ -18,6 +18,7 @@ #include "AMS.h" #include "ml/hdcache.hpp" #include "ml/surrogate.hpp" +#include "resource_manager.hpp" #include "wf/basedb.hpp" #ifdef __ENABLE_MPI__ @@ -75,11 +76,8 @@ class AMSWorkflow * (world_size for MPI) */ int wSize; - /** @brief Whether data and simulation takes place on CPU or GPU*/ - bool isCPU; - - /** @brief Location of data during simulation (CPU or GPU) */ - AMSResourceType mLoc; + /** @brief Location of the original application data (CPU or GPU) */ + AMSResourceType appDataLoc; /** @brief execution policy of the distributed system. Load balance or not. */ const AMSExecPolicy ePolicy; @@ -111,7 +109,7 @@ class AMSWorkflow std::vector hInputs, hOutputs; - if (isCPU) return DB->store(num_elements, inputs, outputs); + if (appDataLoc == AMSResourceType::HOST ) return DB->store(num_elements, inputs, outputs); // Compute number of elements that fit inside the buffer size_t bElements = bSize / sizeof(FPTypeValue); @@ -161,13 +159,9 @@ class AMSWorkflow surrogate(nullptr), DB(nullptr), dbType(AMSDBType::None), - isCPU(false), - mLoc(AMSResourceType::DEVICE), + appDataLoc(AMSResourceType::HOST), ePolicy(AMSExecPolicy::UBALANCED) { - if (isCPU) { - mLoc = AMSResourceType::HOST; - } #ifdef __ENABLE_DB__ DB = createDB("miniApp_data.txt", dbType, 0); @@ -180,7 +174,7 @@ class AMSWorkflow char *surrogate_path, char *db_path, const AMSDBType dbType, - bool is_cpu, + AMSResourceType appDataLoc, FPTypeValue threshold, const AMSUQPolicy uqPolicy, const int nClusters, @@ -191,27 +185,23 @@ class AMSWorkflow dbType(dbType), rId(_pId), wSize(_wSize), - isCPU(is_cpu), - mLoc(AMSResourceType::DEVICE), + appDataLoc(appDataLoc), ePolicy(policy) { - if (isCPU) { - mLoc = AMSResourceType::HOST; - } surrogate = nullptr; if (surrogate_path) surrogate = - SurrogateModel::getInstance(surrogate_path, is_cpu); + SurrogateModel::getInstance(surrogate_path, appDataLoc); // TODO: Fix magic number. 10 represents the number of neighbours I am // looking at. if (uq_path) hdcache = HDCache::getInstance( - uq_path, !is_cpu, uqPolicy, nClusters, threshold); + uq_path, appDataLoc, uqPolicy, nClusters, threshold); else // This is a random hdcache returning true %threshold queries - hdcache = HDCache::getInstance(!is_cpu, threshold); + hdcache = HDCache::getInstance(appDataLoc, threshold); DB = nullptr; if (db_path) { @@ -317,7 +307,7 @@ class AMSWorkflow return; } // The predicate with which we will split the data on a later step - bool *p_ml_acceptable = ams::ResourceManager::allocate(totalElements); + bool *p_ml_acceptable = ams::ResourceManager::allocate(totalElements, appDataLoc); // ------------------------------------------------------------- // STEP 1: call the hdcache to look at input uncertainties @@ -337,7 +327,7 @@ class AMSWorkflow for (int i = 0; i < inputDim; i++) { packedInputs.emplace_back( - ams::ResourceManager::allocate(totalElements)); + ams::ResourceManager::allocate(totalElements, appDataLoc)); } DBG(Workflow, "Allocated input resources") @@ -357,14 +347,14 @@ class AMSWorkflow // ----------------------------------------------------------------- // ---- 3a: we need to pack the sparse data based on the uq flag const long packedElements = - data_handler::pack(predicate, totalElements, origInputs, packedInputs); + data_handler::pack(appDataLoc, predicate, totalElements, origInputs, packedInputs); // Pointer values which store output data values // to be computed using the eos function. std::vector packedOutputs; for (int i = 0; i < outputDim; i++) { packedOutputs.emplace_back( - ams::ResourceManager::allocate(packedElements)); + ams::ResourceManager::allocate(packedElements, appDataLoc)); } { @@ -375,9 +365,9 @@ class AMSWorkflow #ifdef __ENABLE_MPI__ CALIPER(CALI_MARK_BEGIN("LOAD BALANCE MODULE");) AMSLoadBalancer lBalancer( - rId, wSize, packedElements, Comm, inputDim, outputDim, mLoc); + rId, wSize, packedElements, Comm, inputDim, outputDim, appDataLoc); if (ePolicy == AMSExecPolicy::BALANCED && Comm) { - lBalancer.scatterInputs(packedInputs, mLoc); + lBalancer.scatterInputs(packedInputs, appDataLoc); iPtr = reinterpret_cast(lBalancer.inputs()); oPtr = reinterpret_cast(lBalancer.outputs()); lbElements = lBalancer.getBalancedSize(); @@ -395,14 +385,14 @@ class AMSWorkflow #ifdef __ENABLE_MPI__ CALIPER(CALI_MARK_BEGIN("LOAD BALANCE MODULE");) if (ePolicy == AMSExecPolicy::BALANCED && Comm) { - lBalancer.gatherOutputs(packedOutputs, mLoc); + lBalancer.gatherOutputs(packedOutputs, appDataLoc); } CALIPER(CALI_MARK_END("LOAD BALANCE MODULE");) #endif } // ---- 3c: unpack the data - data_handler::unpack(predicate, totalElements, packedOutputs, origOutputs); + data_handler::unpack(appDataLoc, predicate, totalElements, packedOutputs, origOutputs); DBG(Workflow, "Finished physics evaluation") @@ -419,11 +409,11 @@ class AMSWorkflow // Deallocate temporal data // ----------------------------------------------------------------- for (int i = 0; i < inputDim; i++) - ams::ResourceManager::deallocate(packedInputs[i], mLoc); + ams::ResourceManager::deallocate(packedInputs[i], appDataLoc); for (int i = 0; i < outputDim; i++) - ams::ResourceManager::deallocate(packedOutputs[i], mLoc); + ams::ResourceManager::deallocate(packedOutputs[i], appDataLoc); - ams::ResourceManager::deallocate(p_ml_acceptable, mLoc); + ams::ResourceManager::deallocate(p_ml_acceptable, appDataLoc); DBG(Workflow, "Finished AMSExecution") CINFO(Workflow, diff --git a/tests/ams_allocate.cpp b/tests/ams_allocate.cpp index ea9f3f55..58c93185 100644 --- a/tests/ams_allocate.cpp +++ b/tests/ams_allocate.cpp @@ -18,11 +18,7 @@ int main(int argc, char* argv[]) auto& rm = umpire::ResourceManager::getInstance(); int device = std::atoi(argv[1]); - AMSSetupAllocator(AMSResourceType::HOST); - if (device == 1) { - AMSSetupAllocator(AMSResourceType::DEVICE); - AMSSetDefaultAllocator(AMSResourceType::DEVICE); std::cout << "Starting allocation[Done]\n"; double* data = ams::ResourceManager::allocate(1, AMSResourceType::DEVICE); @@ -36,19 +32,6 @@ int main(int argc, char* argv[]) } std::cout << "Explicit device allocation[Done]\n"; - ams::ResourceManager::deallocate(data, AMSResourceType::DEVICE); - std::cout << "Explicit device de-allocation[Done]\n"; - - ams::ResourceManager::setDefaultDataAllocator(AMSResourceType::DEVICE); - - if (ams::ResourceManager::getDefaultDataAllocator() != - AMSResourceType::DEVICE) { - std::cout << "Default allocator not set correctly\n"; - return 2; - } - std::cout << "Set default allocator to device[Done]\n"; - - data = ams::ResourceManager::allocate(1); found_allocator = rm.getAllocator(data); if (strcmp(ams::ResourceManager::getDeviceAllocatorName(), @@ -58,10 +41,10 @@ int main(int argc, char* argv[]) << "Actual Allocation " << found_allocator.getName() << "\n"; return 3; } - std::cout << "Implicit device allocation [Done]\n"; + + ams::ResourceManager::deallocate(data, AMSResourceType::DEVICE); + std::cout << "Explicit device de-allocation[Done]\n"; } else if (device == 0) { - AMSSetDefaultAllocator(AMSResourceType::HOST); - std::cout << "Starting allocation[Done]\n"; double* data = ams::ResourceManager::allocate(1, AMSResourceType::HOST); auto found_allocator = rm.getAllocator(data); @@ -77,16 +60,6 @@ int main(int argc, char* argv[]) ams::ResourceManager::deallocate(data, AMSResourceType::HOST); std::cout << "Explicit device de-allocation[Done]\n"; - ams::ResourceManager::setDefaultDataAllocator(AMSResourceType::HOST); - - if (ams::ResourceManager::getDefaultDataAllocator() != - AMSResourceType::HOST) { - std::cout << "Default allocator not set correctly\n"; - return 2; - } - std::cout << "Set default allocator to device[Done]\n"; - - data = ams::ResourceManager::allocate(1); found_allocator = rm.getAllocator(data); if (strcmp(ams::ResourceManager::getHostAllocatorName(), @@ -96,6 +69,7 @@ int main(int argc, char* argv[]) << "Actual Allocation " << found_allocator.getName() << "\n"; return 3; } + data = ams::ResourceManager::allocate(1, AMSResourceType::HOST); std::cout << "Implicit device allocation [Done]\n"; } } diff --git a/tests/cpu_packing_test.cpp b/tests/cpu_packing_test.cpp index d2203f55..8fb70181 100644 --- a/tests/cpu_packing_test.cpp +++ b/tests/cpu_packing_test.cpp @@ -53,10 +53,11 @@ int main(int argc, char* argv[]) int device = std::atoi(argv[1]); AMSSetupAllocator(AMSResourceType::HOST); if (device == 0) { - bool* predicate = ams::ResourceManager::allocate(SIZE); - double* dense = ams::ResourceManager::allocate(SIZE); - double* sparse = ams::ResourceManager::allocate(SIZE); - double* rsparse = ams::ResourceManager::allocate(SIZE); + AMSResourceType resource = AMSResourceType::HOST; + bool* predicate = ams::ResourceManager::allocate(SIZE, resource); + double* dense = ams::ResourceManager::allocate(SIZE, resource); + double* sparse = ams::ResourceManager::allocate(SIZE, resource); + double* rsparse = ams::ResourceManager::allocate(SIZE, resource); initPredicate(predicate, sparse, SIZE); std::vector s_data({const_cast(sparse)}); @@ -65,7 +66,7 @@ int main(int argc, char* argv[]) int elements; for (int flag = 0; flag < 2; flag++) { - elements = data_handler::pack(predicate, size, s_data, d_data, flag); + elements = data_handler::pack(resource, predicate, size, s_data, d_data, flag); if (elements != (SIZE + flag) / 2) { std::cout << "Did not compute dense number correctly " << elements @@ -78,7 +79,7 @@ int main(int argc, char* argv[]) return 1; } - data_handler::unpack(predicate, size, d_data, sr_data, flag); + data_handler::unpack(resource, predicate, size, d_data, sr_data, flag); if (verify(predicate, sparse, rsparse, size, flag)) { std::cout << "Unpacking packed data does not match initial values\n"; @@ -91,8 +92,7 @@ int main(int argc, char* argv[]) ResourceManager::deallocate(sparse, AMSResourceType::HOST); ResourceManager::deallocate(rsparse, AMSResourceType::HOST); } else if (device == 1) { - AMSSetupAllocator(AMSResourceType::DEVICE); - AMSSetDefaultAllocator(AMSResourceType::DEVICE); + AMSResourceType resource = AMSResourceType::DEVICE; bool* h_predicate = ams::ResourceManager::allocate(SIZE, AMSResourceType::HOST); double* h_dense = @@ -104,11 +104,11 @@ int main(int argc, char* argv[]) initPredicate(h_predicate, h_sparse, SIZE); - bool* predicate = ams::ResourceManager::allocate(SIZE); - double* dense = ams::ResourceManager::allocate(SIZE); - double* sparse = ams::ResourceManager::allocate(SIZE); - double* rsparse = ams::ResourceManager::allocate(SIZE); - int* reindex = ams::ResourceManager::allocate(SIZE); + bool* predicate = ams::ResourceManager::allocate(SIZE, resource); + double* dense = ams::ResourceManager::allocate(SIZE, resource); + double* sparse = ams::ResourceManager::allocate(SIZE, resource); + double* rsparse = ams::ResourceManager::allocate(SIZE, resource); + int* reindex = ams::ResourceManager::allocate(SIZE, resource); ResourceManager::copy(h_predicate, predicate); ResourceManager::copy(h_sparse, sparse); @@ -119,7 +119,7 @@ int main(int argc, char* argv[]) for (int flag = 0; flag < 2; flag++) { int elements; - elements = data_handler::pack(predicate, size, s_data, d_data, flag); + elements = data_handler::pack(resource, predicate, size, s_data, d_data, flag); if (elements != (SIZE + flag) / 2) { std::cout << "Did not compute dense number correctly(" << elements @@ -134,7 +134,7 @@ int main(int argc, char* argv[]) return 1; } - data_handler::unpack(predicate, size, d_data, sr_data, flag); + data_handler::unpack(resource, predicate, size, d_data, sr_data, flag); ams::ResourceManager::copy(rsparse, h_rsparse); diff --git a/tests/test_hdcache.cpp b/tests/test_hdcache.cpp index b26de8fe..7ad9ece5 100644 --- a/tests/test_hdcache.cpp +++ b/tests/test_hdcache.cpp @@ -153,24 +153,20 @@ int main(int argc, char *argv[]) int nDims = std::atoi(argv[7]); int nElements = std::atoi(argv[8]); - AMSSetupAllocator(AMSResourceType::HOST); AMSResourceType resource = AMSResourceType::HOST; - if (use_device == 1) { - AMSSetupAllocator(AMSResourceType::DEVICE); - AMSSetDefaultAllocator(AMSResourceType::DEVICE); + if (use_device == 1) resource = AMSResourceType::DEVICE; - } if (std::strcmp("double", data_type) == 0) { std::shared_ptr> cache = HDCache::getInstance( - faiss_path, use_device, uq_policy, 10, threshold); + faiss_path, resource, uq_policy, 10, threshold); bool result = do_faiss(cache, resource, nClusters, nDims, nElements, threshold); cache.reset(); return !result; } else if (std::strcmp("single", data_type) == 0) { std::shared_ptr> cache = HDCache::getInstance( - faiss_path, use_device, uq_policy, 10, threshold); + faiss_path, resource, uq_policy, 10, threshold); bool result = do_faiss(cache, resource, nClusters, nDims, nElements, threshold); cache.reset(); diff --git a/tests/torch_model.cpp b/tests/torch_model.cpp index 074800dc..55ce7482 100644 --- a/tests/torch_model.cpp +++ b/tests/torch_model.cpp @@ -50,22 +50,19 @@ int main(int argc, char *argv[]) char *model_path = argv[2]; char *data_type = argv[3]; - AMSSetupAllocator(AMSResourceType::HOST); AMSResourceType resource = AMSResourceType::HOST; if (use_device == 1) { - AMSSetupAllocator(AMSResourceType::DEVICE); - AMSSetDefaultAllocator(AMSResourceType::DEVICE); resource = AMSResourceType::DEVICE; } if (std::strcmp("double", data_type) == 0) { std::shared_ptr> model = - SurrogateModel::getInstance(model_path, !use_device); + SurrogateModel::getInstance(model_path, resource); assert(model->is_double()); inference(*model, resource); } else if (std::strcmp("single", data_type) == 0) { std::shared_ptr> model = - SurrogateModel::getInstance(model_path, !use_device); + SurrogateModel::getInstance(model_path, resource); assert(!model->is_double()); inference(*model, resource); } From 715078a475b1ce639a43ef2fa0b53a511c302253 Mon Sep 17 00:00:00 2001 From: koparasy Date: Thu, 2 Nov 2023 13:39:04 -0700 Subject: [PATCH 2/7] Fixed allocators --- examples/main.cpp | 155 ++++++++++++++++++++++++++++++++---- src/AMS.cpp | 49 +++++------- src/include/AMS.h | 9 +-- src/wf/resource_manager.cpp | 76 +++++------------- src/wf/resource_manager.hpp | 148 +++++++++++++++------------------- tests/ams_allocate.cpp | 104 ++++++++++++------------ tests/cpu_packing_test.cpp | 8 +- tests/test_hdcache.cpp | 5 +- tests/torch_model.cpp | 4 +- 9 files changed, 316 insertions(+), 242 deletions(-) diff --git a/examples/main.cpp b/examples/main.cpp index f9f059fa..b5c17fdd 100644 --- a/examples/main.cpp +++ b/examples/main.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -24,6 +25,94 @@ #include "AMS.h" +void printMemory(std::unordered_set &allocators) +{ + auto &rm = umpire::ResourceManager::getInstance(); + for (auto AN : allocators) { + auto alloc = rm.getAllocator(AN); + size_t wm = alloc.getHighWatermark(); + size_t cs = alloc.getCurrentSize(); + size_t as = alloc.getActualSize(); + std::cout << "Allocator '" << AN << "' High WaterMark:" << wm + << " Current Size:" << cs << " Actual Size:" << as << "\n"; + } +} + + +void createUmpirePool(std::string parent_name, std::string pool_name) +{ + std::cout << "Pool Name " << pool_name << "Parent Allocation " << parent_name + << "\n"; + auto &rm = umpire::ResourceManager::getInstance(); + auto alloc_resource = rm.makeAllocator( + pool_name, rm.getAllocator(parent_name)); +} + +std::unordered_set createMemoryAllocators( + std::string pool, + std::string &physics_host_alloc, + std::string &physics_device_alloc, + std::string &physics_pinned_alloc, + std::string &ams_host_alloc, + std::string &ams_device_alloc, + std::string &ams_pinned_alloc) +{ + std::unordered_set allocator_names; + if (pool == "default") { + physics_host_alloc = ams_host_alloc = "HOST"; + allocator_names.insert(ams_host_alloc); +#ifdef __ENABLE_CUDA__ + physics_device_alloc = ams_device_alloc = "DEVICE"; + allocator_names.insert(ams_device_alloc); + physics_pinned_alloc = ams_pinned_alloc = "PINNED"; + allocator_names.insert(ams_pinned_alloc); +#endif + } else if (pool == "split") { + physics_host_alloc = "phys-host"; + createUmpirePool("HOST", "phys-host"); + allocator_names.insert(physics_host_alloc); + + ams_host_alloc = "ams-host"; + createUmpirePool("HOST", ams_host_alloc); + allocator_names.insert(ams_host_alloc); + +#ifdef __ENABLE_CUDA__ + physics_device_alloc = "phys-device"; + createUmpirePool("DEVICE", physics_device_alloc); + allocator_names.insert(physics_device_alloc); + + physics_pinned_alloc = "phys-pinned"; + createUmpirePool("PINNED", physics_pinned_alloc); + allocator_names.insert(physics_pinned_alloc); + + ams_device_alloc = "ams-device"; + createUmpirePool("DEVICE", ams_device_alloc); + allocator_names.insert(ams_device_alloc); + + ams_pinned_alloc = "ams-pinned"; + createUmpirePool("PINNED", ams_pinned_alloc); + allocator_names.insert(ams_pinned_alloc); +#endif + } else if (pool == "same") { + physics_host_alloc = ams_host_alloc = "common-host"; + createUmpirePool("HOST", "common-host"); + allocator_names.insert(physics_host_alloc); +#ifdef __ENABLE_CUDA__ + physics_device_alloc = ams_device_alloc = "common-device"; + createUmpirePool("DEVICE", "common-device"); + allocator_names.insert(ams_device_alloc); + physics_pinned_alloc = ams_pinned_alloc = "common-pinned"; + createUmpirePool("PINNED", "common-pinned"); + allocator_names.insert(ams_pinned_alloc); +#endif + } else { + std::cout << "Stategy is " << pool << "\n"; + throw std::runtime_error("Pool strategy does not exist\n"); + } + return std::move(allocator_names); +} + + using TypeValue = double; using mfem::ForallWrap; @@ -96,6 +185,7 @@ int main(int argc, char **argv) TypeValue avg = 0.5; TypeValue stdDev = 0.2; bool reqDB = false; + const char *pool = "default"; #ifdef __ENABLE_DB__ reqDB = true; @@ -203,6 +293,14 @@ int main(int argc, char **argv) args.AddOption( &verbose, "-v", "--verbose", "-qu", "--quiet", "Print extra stuff"); + args.AddOption(&pool, + "-ptype", + "--pool-type", + "How to assign memory pools to AMSlib:\n" + "\t 'default' Use the default Umpire pool\n" + "\t 'split' provide a separate pool to AMSlib\n" + "\t 'same': assign the same with physics to AMS\n"); + // ------------------------------------------------------------------------- // parse arguments // ------------------------------------------------------------------------- @@ -308,28 +406,53 @@ int main(int argc, char **argv) // ------------------------------------------------------------------------- // setup data allocators // ------------------------------------------------------------------------- -// AMSSetupAllocator(AMSResourceType::HOST); -// if (use_device) { -// AMSSetupAllocator(AMSResourceType::DEVICE); -// AMSSetupAllocator(AMSResourceType::PINNED); -// AMSSetDefaultAllocator(AMSResourceType::DEVICE); -// } else { -// AMSSetDefaultAllocator(AMSResourceType::HOST); -// } + // AMSSetupAllocator(AMSResourceType::HOST); + // if (use_device) { + // AMSSetupAllocator(AMSResourceType::DEVICE); + // AMSSetupAllocator(AMSResourceType::PINNED); + // AMSSetDefaultAllocator(AMSResourceType::DEVICE); + // } else { + // AMSSetDefaultAllocator(AMSResourceType::HOST); + // } // ------------------------------------------------------------------------- // setup mfem memory manager // ------------------------------------------------------------------------- // hardcoded names! - const std::string &alloc_name_host( - AMSGetAllocatorName(AMSResourceType::HOST)); - const std::string &alloc_name_device( - AMSGetAllocatorName(AMSResourceType::DEVICE)); - mfem::MemoryManager::SetUmpireHostAllocatorName(alloc_name_host.c_str()); + std::string physics_host_alloc; + std::string physics_device_alloc; + std::string physics_pinned_alloc; + + std::string ams_host_alloc; + std::string ams_device_alloc; + std::string ams_pinned_alloc; + + auto allocator_names = createMemoryAllocators(std::string(pool), + physics_host_alloc, + physics_device_alloc, + physics_pinned_alloc, + ams_host_alloc, + ams_device_alloc, + ams_pinned_alloc); + + + mfem::MemoryManager::SetUmpireHostAllocatorName(physics_host_alloc.c_str()); if (use_device) { mfem::MemoryManager::SetUmpireDeviceAllocatorName( - alloc_name_device.c_str()); + physics_device_alloc.c_str()); + } + + + // When we are not allocating from parent/root umpire allocator + // we need to inform AMS about the pool allocators. + if (strcmp(pool, "default") != 0) { + AMSSetAllocator(AMSResourceType::HOST, ams_host_alloc.c_str()); + + if (use_device) { + AMSSetAllocator(AMSResourceType::DEVICE, ams_device_alloc.c_str()); + AMSSetAllocator(AMSResourceType::PINNED, ams_pinned_alloc.c_str()); + } } mfem::Device::SetMemoryTypes(mfem::MemoryType::HOST_UMPIRE, @@ -340,7 +463,6 @@ int main(int argc, char **argv) device.Print(); std::cout << std::endl; - //AMSResourceInfo(); // ------------------------------------------------------------------------- // setup indicators @@ -399,7 +521,7 @@ int main(int argc, char **argv) if (eos_name == std::string("ideal_gas")) { eoses[mat_idx] = new IdealGas(1.6, 1.4); } else if (eos_name == std::string("constant_host")) { - eoses[mat_idx] = new ConstantEOSOnHost(alloc_name_host.c_str(), 1.0); + eoses[mat_idx] = new ConstantEOSOnHost(physics_host_alloc.c_str(), 1.0); } else { std::cerr << "unknown eos `" << eos_name << "'" << std::endl; return 1; @@ -696,6 +818,7 @@ int main(int argc, char **argv) } CALIPER(CALI_MARK_END("Cycle");) MPI_CALL(MPI_Barrier(MPI_COMM_WORLD)); + printMemory(allocator_names); } #ifdef USE_AMS delete[] workflow; diff --git a/src/AMS.cpp b/src/AMS.cpp index 24e623fb..fd8f0e4f 100644 --- a/src/AMS.cpp +++ b/src/AMS.cpp @@ -5,20 +5,23 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ +#include "AMS.h" + #include -#include "AMS.h" +#include "wf/resource_manager.hpp" #include "wf/workflow.hpp" -struct AMSWrap{ +struct AMSWrap { std::vector> executors; - ~AMSWrap() { - for ( auto E : executors ){ - if ( E.second != nullptr ){ - if ( E.first == AMSDType::Double ){ - delete reinterpret_cast *> (E.second); - } else{ - delete reinterpret_cast *> (E.second); + ~AMSWrap() + { + for (auto E : executors) { + if (E.second != nullptr) { + if (E.first == AMSDType::Double) { + delete reinterpret_cast *>(E.second); + } else { + delete reinterpret_cast *>(E.second); } } } @@ -36,6 +39,9 @@ void _AMSExecute(AMSExecutor executor, int outputDim, MPI_Comm Comm = 0) { + static std::once_flag flag; + std::call_once(flag, [&]() { ams::ResourceManager::init(); }); + uint64_t index = reinterpret_cast(executor); if (index >= _amsWrap.executors.size()) @@ -89,7 +95,8 @@ AMSExecutor AMSCreateExecutor(const AMSConfig config) config.wSize, config.ePolicy); - _amsWrap.executors.push_back(std::make_pair(config.dType, static_cast(dWF))); + _amsWrap.executors.push_back( + std::make_pair(config.dType, static_cast(dWF))); return reinterpret_cast(_amsWrap.executors.size() - 1L); } else if (config.dType == AMSDType::Single) { ams::AMSWorkflow *sWF = @@ -105,7 +112,8 @@ AMSExecutor AMSCreateExecutor(const AMSConfig config) config.pId, config.wSize, config.ePolicy); - _amsWrap.executors.push_back(std::make_pair(config.dType, static_cast(sWF))); + _amsWrap.executors.push_back( + std::make_pair(config.dType, static_cast(sWF))); return reinterpret_cast(_amsWrap.executors.size() - 1L); } else { @@ -155,25 +163,12 @@ void AMSDistributedExecute(AMSExecutor executor, const char *AMSGetAllocatorName(AMSResourceType device) { - if (device == AMSResourceType::HOST) { - return ams::ResourceManager::getHostAllocatorName(); - } else if (device == AMSResourceType::DEVICE) { - return ams::ResourceManager::getDeviceAllocatorName(); - } - - throw std::runtime_error("requested Device Allocator does not exist"); - - return nullptr; -} - -void AMSSetupAllocator(const AMSResourceType Resource) -{ - ams::ResourceManager::setup(Resource); + return std::move(ams::ResourceManager::getAllocatorName(device)).c_str(); } -int AMSGetLocationId(void *ptr) +void AMSSetAllocator(AMSResourceType resource, const char *alloc_name) { - return ams::ResourceManager::getDataAllocationId(ptr); + ams::ResourceManager::setAllocator(std::string(alloc_name), resource); } #ifdef __cplusplus diff --git a/src/include/AMS.h b/src/include/AMS.h index 0f692151..794c4eef 100644 --- a/src/include/AMS.h +++ b/src/include/AMS.h @@ -58,9 +58,9 @@ typedef enum { UBALANCED = 0, BALANCED } AMSExecPolicy; typedef enum { None = 0, CSV, REDIS, HDF5, RMQ } AMSDBType; typedef enum { - FAISSMean =0, + FAISSMean = 0, FAISSMax, - DeltaUQ // Not supported + DeltaUQ // Not supported } AMSUQPolicy; typedef struct ams_conf { @@ -105,11 +105,8 @@ void AMSDestroyExecutor(AMSExecutor executor); int AMSSetCommunicator(MPI_Comm Comm); #endif +void AMSSetAllocator(AMSResourceType resource, const char *alloc_name); const char *AMSGetAllocatorName(AMSResourceType device); -void AMSSetupAllocator(const AMSResourceType device); -void AMSSetDefaultAllocator(const AMSResourceType device); -void AMSResourceInfo(); -int AMSGetLocationId(void *ptr); #ifdef __cplusplus } diff --git a/src/wf/resource_manager.cpp b/src/wf/resource_manager.cpp index 370dcea6..1c045f45 100644 --- a/src/wf/resource_manager.cpp +++ b/src/wf/resource_manager.cpp @@ -5,76 +5,44 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ +#include "resource_manager.hpp" + +#include #include #include -#include "resource_manager.hpp" +#include "debug.h" namespace ams { -//! -------------------------------------------------------------------------- -const char* ResourceManager::getDeviceAllocatorName() -{ - return "mmp-device-quickpool"; -} +std::string AMSAllocator::getName() { return allocator.getName(); } -const char* ResourceManager::getHostAllocatorName() +void *AMSAllocator::allocate(size_t num_bytes) { - return "mmp-host-quickpool"; + void *ptr = allocator.allocate(num_bytes); + CFATAL(ResourceManager, + ptr == nullptr, + "Failed to allocated %ld values using allocator %s", + num_bytes, + getName().c_str()); + return ptr; } -const char* ResourceManager::getPinnedAllocatorName() -{ - return "mmp-pinned-quickpool"; -} +void AMSAllocator::deallocate(void *ptr) { allocator.deallocate(ptr); } -const char* ResourceManager::getAllocatorName(AMSResourceType Resource) +void AMSAllocator::registerPtr(void *ptr, size_t nBytes) { - if (Resource == AMSResourceType::HOST) - return ResourceManager::getHostAllocatorName(); - else if (Resource == AMSResourceType::DEVICE) - return ResourceManager::getDeviceAllocatorName(); - else if (Resource == AMSResourceType::PINNED) - return ResourceManager::getPinnedAllocatorName(); - else { - FATAL(ResourceManager, "Request allocator for resource that does not exist (%d)", Resource) - return nullptr; - } + auto &rm = umpire::ResourceManager::getInstance(); + rm.registerAllocation(ptr, + umpire::util::AllocationRecord( + ptr, nBytes, allocator.getAllocationStrategy())); } -//! -------------------------------------------------------------------------- -// maintain a list of allocator ids -int ResourceManager::allocator_ids[AMSResourceType::RSEND] = {-1, -1, -1}; - -// maintain a list of allocator names -std::string ResourceManager::allocator_names[AMSResourceType::RSEND] = {"HOST", - "DEVICE", - "PINNED"}; - - +std::vector ResourceManager::RMAllocators = {nullptr, + nullptr, + nullptr}; // ----------------------------------------------------------------------------- // set up the resource manager // ----------------------------------------------------------------------------- -PERFFASPECT() -void ResourceManager::setup(const AMSResourceType Resource) -{ - if (Resource < AMSResourceType::HOST || Resource >= AMSResourceType::RSEND) { - throw std::runtime_error("Resource does not exist\n"); - } - - // use umpire resource manager - auto& rm = umpire::ResourceManager::getInstance(); - - auto alloc_name = ResourceManager::getAllocatorName(Resource); - auto alloc_resource = rm.makeAllocator( - alloc_name, rm.getAllocator(allocator_names[Resource])); - - DBG(ResourceManager, - "Setting up ams::ResourceManager::%s:%d", - allocator_names[Resource].c_str(), - Resource); - - allocator_ids[Resource] = alloc_resource.getId(); -} } // namespace ams diff --git a/src/wf/resource_manager.hpp b/src/wf/resource_manager.hpp index 2e53ad5a..cf242843 100644 --- a/src/wf/resource_manager.hpp +++ b/src/wf/resource_manager.hpp @@ -24,16 +24,39 @@ namespace ams * a unified interface to the umpire library for memory allocations * and data movements/copies. */ + +struct AMSAllocator { + int id; + umpire::Allocator allocator; + + AMSAllocator(std::string alloc_name) + { + auto& rm = umpire::ResourceManager::getInstance(); + allocator = rm.getAllocator(alloc_name); + } + + void* allocate(size_t num_bytes); + void deallocate(void* ptr); + + void setAllocator(umpire::Allocator& alloc); + + std::string getName(); + + void registerPtr(void* ptr, size_t nBytes); + static void deregisterPtr(void* ptr) + { + auto& rm = umpire::ResourceManager::getInstance(); + rm.deregisterAllocation(ptr); + } +}; + class ResourceManager { public: private: /** @brief Used internally to map resource types (Device, host, pinned memory) to * umpire allocator ids. */ - static int allocator_ids[AMSResourceType::RSEND]; - - /** @brief The names of the user defined allocators */ - static std::string allocator_names[AMSResourceType::RSEND]; + static std::vector RMAllocators; public: ResourceManager() = delete; @@ -42,60 +65,10 @@ class ResourceManager ResourceManager& operator=(const ResourceManager&) = delete; ResourceManager& operator=(ResourceManager&&) = delete; - /** @brief The names of the user defined allocators */ - static const char* getDeviceAllocatorName(); - - /** @brief Get the name of the Host allocator */ - static const char* getHostAllocatorName(); - - /** @brief Get the name of the Pinned memory Allocator */ - static const char* getPinnedAllocatorName(); - - /** @brief Get the name of the Pinned memory Allocator */ - static const char* getAllocatorName(AMSResourceType Resource); - - /** @brief setup allocators in the resource manager */ - static void setup(const AMSResourceType resource); - - /** @brief Check if pointer is allocatd through - * @tparam TypeInValue The type of pointer being tested. - * @param[in] data pointer to memory. - * @return Boolean value describing whether the pointer has - * been allocated through an internal allocator - */ - template - static bool hasAllocator(const TypeInValue* data) + /** @brief return the name of an allocator */ + static std::string getAllocatorName(AMSResourceType resource) { - static auto& rm = umpire::ResourceManager::getInstance(); - void* vdata = static_cast(const_cast(data)); - return rm.hasAllocator(vdata); - } - - /** @brief Returns the id of the allocator allocated the defined memory. - * @tparam TypeInValue The type of pointer being tested. - * @param[in] data pointer to memory. - * @return Allocator Id. - */ - template - static int getDataAllocationId(const TypeInValue* data) - { - static auto& rm = umpire::ResourceManager::getInstance(); - void* vdata = static_cast(const_cast(data)); - return hasAllocator(vdata) ? rm.getAllocator(vdata).getId() : -1; - } - - /** @brief Returns the name of the allocator allocated the defined memory. - * @tparam TypeInValue The type of pointer being tested. - * @param[in] data pointer to memory. - * @return Allocator name if allocated through umpire else "unknown". - */ - template - static const std::string getDataAllocationName(const TypeInValue* data) - { - static auto& rm = umpire::ResourceManager::getInstance(); - void* vdata = static_cast(const_cast(data)); - return rm.hasAllocator(vdata) ? rm.getAllocator(vdata).getName() - : "unknown"; + return RMAllocators[resource]->getName(); } /** @brief Allocates nvalues on the specified device. @@ -108,20 +81,8 @@ class ResourceManager PERFFASPECT() static TypeInValue* allocate(size_t nvalues, AMSResourceType dev) { - static auto& rm = umpire::ResourceManager::getInstance(); - DBG(ResourceManager, - "Requesting to allocate %ld values using allocator :%s", - nvalues, - getAllocatorName(dev)); - auto alloc = rm.getAllocator(allocator_ids[dev]); - TypeInValue* ret = static_cast( - alloc.allocate(nvalues * sizeof(TypeInValue))); - CFATAL(ResourceManager, - ret == nullptr, - "Failed to allocated %ld values on device %d", - nvalues, - dev); - return ret; + return static_cast( + RMAllocators[dev]->allocate(nvalues * sizeof(TypeInValue))); } /** @brief deallocates pointer from the specified device. @@ -134,10 +95,7 @@ class ResourceManager PERFFASPECT() static void deallocate(TypeInValue* data, AMSResourceType dev) { - static auto& rm = umpire::ResourceManager::getInstance(); - if (hasAllocator(data)) { - rm.getAllocator(allocator_ids[dev]).deallocate(data); - } + RMAllocators[dev]->deallocate(data); } /** @brief registers an external pointer in the umpire allocation records. @@ -149,11 +107,7 @@ class ResourceManager PERFFASPECT() static void registerExternal(void* ptr, size_t nBytes, AMSResourceType dev) { - auto& rm = umpire::ResourceManager::getInstance(); - auto alloc = rm.getAllocator(allocator_ids[dev]); - rm.registerAllocation(ptr, - umpire::util::AllocationRecord( - ptr, nBytes, alloc.getAllocationStrategy())); + RMAllocators[dev]->registerPtr(ptr, nBytes); } /** @brief removes a registered external pointer from the umpire allocation records. @@ -162,8 +116,7 @@ class ResourceManager */ static void deregisterExternal(void* ptr) { - auto& rm = umpire::ResourceManager::getInstance(); - rm.deregisterAllocation(ptr); + AMSAllocator::deregisterPtr(ptr); } /** @brief copy values from src to destination regardless of their memory location. @@ -187,10 +140,37 @@ class ResourceManager * @return void. */ template - static void deallocate(std::vector& dPtr) + static void deallocate(std::vector& dPtr, AMSResourceType resource) { for (auto* I : dPtr) - deallocate(I); + RMAllocators[resource]->deallocate(I); + } + + static void init() + { + DBG(ResourceManager, "Default initialization of allocators"); + if (!RMAllocators[AMSResourceType::HOST]) + setAllocator("HOST", AMSResourceType::HOST); +#ifdef __ENABLE_CUDA__ + if (!RMAllocators[AMSResourceType::DEVICE]) + setAllocator("DEVICE", AMSResourceType::HOST); + + if (!RMAllocators[AMSResourceType::PINNED]) + setAllocator("PINNED", AMSResourceType::PINNED); +#endif + } + + static void setAllocator(std::string alloc_name, AMSResourceType resource) + { + if (RMAllocators[resource]) { + delete RMAllocators[resource]; + } + + RMAllocators[resource] = new AMSAllocator(alloc_name); + DBG(ResourceManager, + "Set Allocator [%d] to pool with name : %s", + resource, + RMAllocators[resource]->getName().c_str()); } /** @brief Returns the memory consumption of the given resource as measured from Umpire. diff --git a/tests/ams_allocate.cpp b/tests/ams_allocate.cpp index 58c93185..d5ea1631 100644 --- a/tests/ams_allocate.cpp +++ b/tests/ams_allocate.cpp @@ -11,65 +11,71 @@ #include #include #include +#include #include -int main(int argc, char* argv[]) +int test_allocation(AMSResourceType resource, std::string pool_name) { + std::cout << "Testing Pool: " << pool_name << "\n"; auto& rm = umpire::ResourceManager::getInstance(); - int device = std::atoi(argv[1]); + double* data = ams::ResourceManager::allocate(1, resource); + auto found_allocator = rm.getAllocator(data); + if (ams::ResourceManager::getAllocatorName(resource) != + found_allocator.getName()) { + std::cout << "Allocator Name" + << ams::ResourceManager::getAllocatorName(resource) + << "Actual Allocation " << found_allocator.getName() << "\n"; + return 1; + } - if (device == 1) { - std::cout << "Starting allocation[Done]\n"; - double* data = - ams::ResourceManager::allocate(1, AMSResourceType::DEVICE); - auto found_allocator = rm.getAllocator(data); - if (strcmp(ams::ResourceManager::getDeviceAllocatorName(), - found_allocator.getName().data()) != 0) { - std::cout << "Device Allocator Name" - << ams::ResourceManager::getDeviceAllocatorName() - << "Actual Allocation " << found_allocator.getName() << "\n"; - return 1; - } - std::cout << "Explicit device allocation[Done]\n"; + if (ams::ResourceManager::getAllocatorName(resource) != pool_name) { + std::cout << "Allocator Name" + << ams::ResourceManager::getAllocatorName(resource) + << "is not equal to pool name " << pool_name << "\n"; + return 1; + } - found_allocator = rm.getAllocator(data); - if (strcmp(ams::ResourceManager::getDeviceAllocatorName(), - found_allocator.getName().data()) != 0) { - std::cout << "Device Allocator Name" - << ams::ResourceManager::getDeviceAllocatorName() - << "Actual Allocation " << found_allocator.getName() << "\n"; - return 3; - } + found_allocator = rm.getAllocator(data); + if (ams::ResourceManager::getAllocatorName(resource) != + found_allocator.getName().data()) { + std::cout << "Device Allocator Name" + << ams::ResourceManager::getAllocatorName(resource) + << "Actual Allocation " << found_allocator.getName() << "\n"; + return 3; + } - ams::ResourceManager::deallocate(data, AMSResourceType::DEVICE); - std::cout << "Explicit device de-allocation[Done]\n"; - } else if (device == 0) { - double* data = - ams::ResourceManager::allocate(1, AMSResourceType::HOST); - auto found_allocator = rm.getAllocator(data); - if (strcmp(ams::ResourceManager::getHostAllocatorName(), - found_allocator.getName().data()) != 0) { - std::cout << "Host Allocator Name" - << ams::ResourceManager::getHostAllocatorName() - << "Actual Allocation " << found_allocator.getName() << "\n"; - return 1; - } - std::cout << "Explicit device allocation[Done]\n"; + ams::ResourceManager::deallocate(data, resource); + return 0; +} + +int main(int argc, char* argv[]) +{ + int device = std::atoi(argv[1]); - ams::ResourceManager::deallocate(data, AMSResourceType::HOST); - std::cout << "Explicit device de-allocation[Done]\n"; + // Testing with global umpire allocators + ams::ResourceManager::init(); + if (device == 1) { + if (test_allocation(AMSResourceType::DEVICE, "DEVICE") != 0) return 1; + } else if (device == 0) { + if (test_allocation(AMSResourceType::HOST, "HOST") != 0) return 1; + } + // Testing with pools - found_allocator = rm.getAllocator(data); - if (strcmp(ams::ResourceManager::getHostAllocatorName(), - found_allocator.getName().data()) != 0) { - std::cout << "Host Allocator Name" - << ams::ResourceManager::getHostAllocatorName() - << "Actual Allocation " << found_allocator.getName() << "\n"; - return 3; - } - data = ams::ResourceManager::allocate(1, AMSResourceType::HOST); - std::cout << "Implicit device allocation [Done]\n"; + if (device == 1) { + auto& rm = umpire::ResourceManager::getInstance(); + auto alloc_resource = rm.makeAllocator( + "test-device", rm.getAllocator("DEVICE")); + ams::ResourceManager::setAllocator("test-device", AMSResourceType::DEVICE); + if (test_allocation(AMSResourceType::DEVICE, "test-device") != 0) return 1; + } else if (device == 0) { + auto& rm = umpire::ResourceManager::getInstance(); + auto alloc_resource = rm.makeAllocator( + "test-host", rm.getAllocator("HOST")); + ams::ResourceManager::setAllocator("test-host", AMSResourceType::HOST); + if (test_allocation(AMSResourceType::HOST, "test-host") != 0) return 1; } + + return 0; } diff --git a/tests/cpu_packing_test.cpp b/tests/cpu_packing_test.cpp index 8fb70181..508ed7a8 100644 --- a/tests/cpu_packing_test.cpp +++ b/tests/cpu_packing_test.cpp @@ -51,7 +51,7 @@ int main(int argc, char* argv[]) using data_handler = DataHandler; const size_t size = SIZE; int device = std::atoi(argv[1]); - AMSSetupAllocator(AMSResourceType::HOST); + ams::ResourceManager::init(); if (device == 0) { AMSResourceType resource = AMSResourceType::HOST; bool* predicate = ams::ResourceManager::allocate(SIZE, resource); @@ -66,7 +66,8 @@ int main(int argc, char* argv[]) int elements; for (int flag = 0; flag < 2; flag++) { - elements = data_handler::pack(resource, predicate, size, s_data, d_data, flag); + elements = + data_handler::pack(resource, predicate, size, s_data, d_data, flag); if (elements != (SIZE + flag) / 2) { std::cout << "Did not compute dense number correctly " << elements @@ -119,7 +120,8 @@ int main(int argc, char* argv[]) for (int flag = 0; flag < 2; flag++) { int elements; - elements = data_handler::pack(resource, predicate, size, s_data, d_data, flag); + elements = + data_handler::pack(resource, predicate, size, s_data, d_data, flag); if (elements != (SIZE + flag) / 2) { std::cout << "Did not compute dense number correctly(" << elements diff --git a/tests/test_hdcache.cpp b/tests/test_hdcache.cpp index 7ad9ece5..2416717b 100644 --- a/tests/test_hdcache.cpp +++ b/tests/test_hdcache.cpp @@ -154,8 +154,9 @@ int main(int argc, char *argv[]) int nElements = std::atoi(argv[8]); AMSResourceType resource = AMSResourceType::HOST; - if (use_device == 1) - resource = AMSResourceType::DEVICE; + if (use_device == 1) resource = AMSResourceType::DEVICE; + + ams::ResourceManager::init(); if (std::strcmp("double", data_type) == 0) { std::shared_ptr> cache = HDCache::getInstance( diff --git a/tests/torch_model.cpp b/tests/torch_model.cpp index 55ce7482..736049e5 100644 --- a/tests/torch_model.cpp +++ b/tests/torch_model.cpp @@ -36,7 +36,7 @@ void inference(SurrogateModel &model, AMSResourceType resource) for (int i = 0; i < 2; i++) - ResourceManager::deallocate(const_cast(inputs[i]), resource); + ResourceManager::deallocate(const_cast(inputs[i]), resource); for (int i = 0; i < 4; i++) ResourceManager::deallocate(outputs[i], resource); @@ -55,6 +55,8 @@ int main(int argc, char *argv[]) resource = AMSResourceType::DEVICE; } + ams::ResourceManager::init(); + if (std::strcmp("double", data_type) == 0) { std::shared_ptr> model = SurrogateModel::getInstance(model_path, resource); From 126db88cba7d08e4d38de2b0a36140d0e7bf913b Mon Sep 17 00:00:00 2001 From: koparasy Date: Thu, 2 Nov 2023 13:56:02 -0700 Subject: [PATCH 3/7] Fix Device issues --- src/ml/hdcache.hpp | 2 +- src/wf/cuda/utilities.cuh | 2 +- src/wf/resource_manager.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ml/hdcache.hpp b/src/ml/hdcache.hpp index a3d84c06..87a467a1 100644 --- a/src/ml/hdcache.hpp +++ b/src/ml/hdcache.hpp @@ -123,7 +123,7 @@ class HDCache { #ifdef __ENABLE_CUDA__ // Copy index to device side - if (cache_location == AMSRAMSResourceType::DEVICE) { + if (cache_location == AMSResourceType::DEVICE) { faiss::gpu::GpuClonerOptions copyOptions; faiss::gpu::ToGpuCloner cloner(&res, 0, copyOptions); m_index = cloner.clone_Index(m_index); diff --git a/src/wf/cuda/utilities.cuh b/src/wf/cuda/utilities.cuh index 7f10f26c..d8e64577 100644 --- a/src/wf/cuda/utilities.cuh +++ b/src/wf/cuda/utilities.cuh @@ -428,7 +428,7 @@ void cuda_rand_init(bool* predicate, const size_t length, T threshold) const int BS = 128; int numBlocks = divup(TS, BS); if (!dev_random) { - dev_random = ams::ResourceManager::allocate(4096); + dev_random = ams::ResourceManager::allocate(4096, AMSResourceType::DEVICE); srand_dev<<>>(dev_random, TS); } diff --git a/src/wf/resource_manager.hpp b/src/wf/resource_manager.hpp index cf242843..6d58db0a 100644 --- a/src/wf/resource_manager.hpp +++ b/src/wf/resource_manager.hpp @@ -153,7 +153,7 @@ class ResourceManager setAllocator("HOST", AMSResourceType::HOST); #ifdef __ENABLE_CUDA__ if (!RMAllocators[AMSResourceType::DEVICE]) - setAllocator("DEVICE", AMSResourceType::HOST); + setAllocator("DEVICE", AMSResourceType::DEVICE); if (!RMAllocators[AMSResourceType::PINNED]) setAllocator("PINNED", AMSResourceType::PINNED); From 357260cea049c8bf2aef656779632d94073ab34b Mon Sep 17 00:00:00 2001 From: koparasy Date: Thu, 2 Nov 2023 17:17:05 -0700 Subject: [PATCH 4/7] Fix device faiss initialization and destruction --- src/AMS.cpp | 4 ++-- src/ml/hdcache.hpp | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/AMS.cpp b/src/AMS.cpp index fd8f0e4f..8b8308cf 100644 --- a/src/AMS.cpp +++ b/src/AMS.cpp @@ -87,7 +87,7 @@ AMSExecutor AMSCreateExecutor(const AMSConfig config) config.SPath, config.DBPath, config.dbType, - AMSResourceType::HOST, + config.device, config.threshold, config.uqPolicy, config.nClusters, @@ -105,7 +105,7 @@ AMSExecutor AMSCreateExecutor(const AMSConfig config) config.SPath, config.DBPath, config.dbType, - AMSResourceType::HOST, + config.device, static_cast(config.threshold), config.uqPolicy, config.nClusters, diff --git a/src/ml/hdcache.hpp b/src/ml/hdcache.hpp index 87a467a1..20a22b5c 100644 --- a/src/ml/hdcache.hpp +++ b/src/ml/hdcache.hpp @@ -249,8 +249,12 @@ class HDCache #ifdef __ENABLE_FAISS__ if (m_index) { DBG(UQModule, "Deleting HD-Cache"); - m_index->reset(); - delete m_index; + /// TODO: Deleting the cache on device can, and does + /// result in C++ destructor. + if ( cache_location != AMSResourceType::DEVICE){ + m_index->reset(); + delete m_index; + } } #endif } From 71e66ecb5075782de73e513e6b3bfe3c302a9ea8 Mon Sep 17 00:00:00 2001 From: koparasy Date: Fri, 3 Nov 2023 12:42:24 -0700 Subject: [PATCH 5/7] Address comment --- examples/main.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/examples/main.cpp b/examples/main.cpp index b5c17fdd..0f710a0f 100644 --- a/examples/main.cpp +++ b/examples/main.cpp @@ -403,18 +403,6 @@ int main(int argc, char **argv) std::cerr << "Rank:" << rId << " Threshold " << threshold << "\n"; - // ------------------------------------------------------------------------- - // setup data allocators - // ------------------------------------------------------------------------- - // AMSSetupAllocator(AMSResourceType::HOST); - // if (use_device) { - // AMSSetupAllocator(AMSResourceType::DEVICE); - // AMSSetupAllocator(AMSResourceType::PINNED); - // AMSSetDefaultAllocator(AMSResourceType::DEVICE); - // } else { - // AMSSetDefaultAllocator(AMSResourceType::HOST); - // } - // ------------------------------------------------------------------------- // setup mfem memory manager // ------------------------------------------------------------------------- From 10dc7d551f26b2a9984e29e1b44d9cd5b3c462b1 Mon Sep 17 00:00:00 2001 From: koparasy Date: Mon, 6 Nov 2023 07:37:35 -0800 Subject: [PATCH 6/7] Fix memory stats issue --- src/wf/resource_manager.cpp | 8 ++++++++ src/wf/resource_manager.hpp | 8 +++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/wf/resource_manager.cpp b/src/wf/resource_manager.cpp index 1c045f45..2d35573f 100644 --- a/src/wf/resource_manager.cpp +++ b/src/wf/resource_manager.cpp @@ -39,6 +39,14 @@ void AMSAllocator::registerPtr(void *ptr, size_t nBytes) ptr, nBytes, allocator.getAllocationStrategy())); } +void AMSAllocator::getAllocatorStats(size_t &wm, size_t &cs, size_t &as) +{ + wm = allocator.getHighWatermark(); + cs = allocator.getCurrentSize(); + as = allocator.getActualSize(); +} + + std::vector ResourceManager::RMAllocators = {nullptr, nullptr, nullptr}; diff --git a/src/wf/resource_manager.hpp b/src/wf/resource_manager.hpp index 6d58db0a..a16eadd6 100644 --- a/src/wf/resource_manager.hpp +++ b/src/wf/resource_manager.hpp @@ -48,6 +48,8 @@ struct AMSAllocator { auto& rm = umpire::ResourceManager::getInstance(); rm.deregisterAllocation(ptr); } + + void getAllocatorStats(size_t& wm, size_t& cs, size_t& as); }; class ResourceManager @@ -185,11 +187,7 @@ class ResourceManager size_t& cs, size_t& as) { - auto& rm = umpire::ResourceManager::getInstance(); - auto alloc = rm.getAllocator(allocator_ids[resource]); - wm = alloc.getHighWatermark(); - cs = alloc.getCurrentSize(); - as = alloc.getActualSize(); + RMAllocators[resource]->getAllocatorStats(wm, cs, as); return; } //! ------------------------------------------------------------------------ From af7001abc51e4b2855c341b9686b379d47d10c11 Mon Sep 17 00:00:00 2001 From: koparasy Date: Mon, 6 Nov 2023 07:51:16 -0800 Subject: [PATCH 7/7] Fix hdcache order --- src/ml/hdcache.hpp | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/ml/hdcache.hpp b/src/ml/hdcache.hpp index 20a22b5c..00e67f2c 100644 --- a/src/ml/hdcache.hpp +++ b/src/ml/hdcache.hpp @@ -133,8 +133,7 @@ class HDCache } #else // Disabled FAISS HDCache(const std::string &cache_path, - int knbrs, - AMSRAMSResourceType resource, + AMSResourceType resource, const AMSUQPolicy uqPolicy, int knbrs, TypeInValue threshold = 0.5) @@ -251,7 +250,7 @@ class HDCache DBG(UQModule, "Deleting HD-Cache"); /// TODO: Deleting the cache on device can, and does /// result in C++ destructor. - if ( cache_location != AMSResourceType::DEVICE){ + if (cache_location != AMSResourceType::DEVICE) { m_index->reset(); delete m_index; } @@ -347,7 +346,8 @@ class HDCache !has_index(), "HDCache does not have a valid and trained index!") - TypeValue *lin_data = data_handler::linearize_features(cache_location, ndata, inputs); + TypeValue *lin_data = + data_handler::linearize_features(cache_location, ndata, inputs); _add(ndata, lin_data); ams::ResourceManager::deallocate(lin_data, cache_location); } @@ -375,7 +375,8 @@ class HDCache void train(const size_t ndata, const std::vector &inputs) { if (m_use_random) return; - TypeValue *lin_data = data_handler::linearize_features(cache_location, ndata, inputs); + TypeValue *lin_data = + data_handler::linearize_features(cache_location, ndata, inputs); _train(ndata, lin_data); ams::ResourceManager::deallocate(lin_data, cache_location); } @@ -437,7 +438,8 @@ class HDCache if (m_use_random) { _evaluate(ndata, is_acceptable); } else { - TypeValue *lin_data = data_handler::linearize_features(cache_location, ndata, inputs); + TypeValue *lin_data = + data_handler::linearize_features(cache_location, ndata, inputs); _evaluate(ndata, lin_data, is_acceptable); ams::ResourceManager::deallocate(lin_data, cache_location); } @@ -467,7 +469,8 @@ class HDCache PERFFASPECT() inline void _add(const size_t ndata, const T *data) { - TypeValue *vdata = data_handler::cast_to_typevalue(cache_location, ndata, data); + TypeValue *vdata = + data_handler::cast_to_typevalue(cache_location, ndata, data); _add(ndata, vdata); delete[] vdata; } @@ -499,7 +502,8 @@ class HDCache PERFFASPECT() inline void _train(const size_t ndata, const T *data) { - TypeValue *vdata = data_handler::cast_to_typevalue(cache_location, ndata, data); + TypeValue *vdata = + data_handler::cast_to_typevalue(cache_location, ndata, data); _train(ndata, vdata); delete[] vdata; } @@ -516,9 +520,11 @@ class HDCache static const TypeValue ook = 1.0 / TypeValue(knbrs); TypeValue *kdists = - ams::ResourceManager::allocate(ndata * knbrs, cache_location); + ams::ResourceManager::allocate(ndata * knbrs, + cache_location); TypeIndex *kidxs = - ams::ResourceManager::allocate(ndata * knbrs, cache_location); + ams::ResourceManager::allocate(ndata * knbrs, + cache_location); // query faiss // TODO: This is a HACK. When searching more than 65535 @@ -539,7 +545,7 @@ class HDCache #endif // compute means - if (cache_location== AMSResourceType::HOST) { + if (cache_location == AMSResourceType::HOST) { for (size_t i = 0; i < ndata; ++i) { CFATAL(UQModule, m_policy == AMSUQPolicy::DeltaUQ, @@ -577,7 +583,8 @@ class HDCache std::enable_if_t::value> * = nullptr> inline void _evaluate(const size_t ndata, T *data, bool *is_acceptable) const { - TypeValue *vdata = data_handler::cast_to_typevalue(cache_location, ndata, data); + TypeValue *vdata = + data_handler::cast_to_typevalue(cache_location, ndata, data); _evaluate(ndata, data, is_acceptable); delete[] vdata; } @@ -609,7 +616,7 @@ class HDCache PERFFASPECT() inline void _evaluate(const size_t ndata, bool *is_acceptable) const { - if ( cache_location == AMSResourceType::HOST ) { + if (cache_location == AMSResourceType::HOST) { #ifdef __ENABLE_CUDA__ random_uq_Device<<<1, 1>>>(is_acceptable, ndata, acceptable_error); #endif