From 6be01852aa16ca9f9c5f2fe5ed97065f0b1993ee Mon Sep 17 00:00:00 2001 From: shosseinimotlagh Date: Thu, 4 Jan 2024 10:00:11 -0800 Subject: [PATCH 1/3] change number 1 which didn't work --- src/engine/blkalloc/blk.h | 2 +- src/homeblks/homeblks_config.fbs | 3 + src/homeblks/volume/tests/vol_gtest.cpp | 71 +++++++++- src/homeblks/volume/volume.cpp | 177 ++++++++++++++++++++++-- src/homeblks/volume/volume.hpp | 8 +- 5 files changed, 246 insertions(+), 15 deletions(-) diff --git a/src/engine/blkalloc/blk.h b/src/engine/blkalloc/blk.h index 5429f77bf..40262f154 100644 --- a/src/engine/blkalloc/blk.h +++ b/src/engine/blkalloc/blk.h @@ -102,7 +102,7 @@ struct BlkId { bool operator==(const BlkId& other) noexcept { return (compare(*this, other) == 0); } void invalidate() { set(blk_num_t{0}, blk_count_t{0}, s_chunk_num_mask); } - + // return invalid_blk_id() { return blk_count_t{0}; } [[nodiscard]] bool is_valid() const { return (m_chunk_num != s_chunk_num_mask); } [[nodiscard]] BlkId get_blkid_at(const uint32_t offset, const uint32_t pagesz) const { diff --git a/src/homeblks/homeblks_config.fbs b/src/homeblks/homeblks_config.fbs index 42566a9c4..a99da66d2 100644 --- a/src/homeblks/homeblks_config.fbs +++ b/src/homeblks/homeblks_config.fbs @@ -34,6 +34,9 @@ table GeneralConfig { // These fields should only be changed by agent through workflow boot_restricted_mode: bool = false; boot_safe_mode: bool = false; + + // This field is for enabling thin provisioing on booting + boot_thin_provisioning: bool = true; } table HomeBlksSettings { diff --git a/src/homeblks/volume/tests/vol_gtest.cpp b/src/homeblks/volume/tests/vol_gtest.cpp index d8aa4ef17..932b2fa7f 100644 --- a/src/homeblks/volume/tests/vol_gtest.cpp +++ b/src/homeblks/volume/tests/vol_gtest.cpp @@ -1675,7 +1675,8 @@ class IOTestJob : public TestJob { // lba: [0, max_vol_blks - max_blks) std::uniform_int_distribution< uint64_t > lba_random{0, vinfo->max_vol_blks - max_blks - 1}; // nlbas: [1, max_blks] - std::uniform_int_distribution< uint32_t > nlbas_random{1, max_blks}; +// std::uniform_int_distribution< uint32_t > nlbas_random{1, max_blks}; + std::uniform_int_distribution< uint32_t > nlbas_random{1, 5}; // we won't be writing more then 128 blocks in one io uint32_t attempt{1}; @@ -1816,16 +1817,21 @@ class IOTestJob : public TestJob { const uint64_t page_size{VolInterface::get_instance()->get_page_size(vol)}; const uint64_t size{nlbas * page_size}; boost::intrusive_ptr< io_req_t > vreq{}; + + static thread_local std::random_device rd{}; + static thread_local std::default_random_engine engine{rd()}; + static thread_local std::uniform_int_distribution< uint8_t > dist{0, 1}; + if (tcfg.write_cache) { uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; HS_REL_ASSERT_NOTNULL(wbuf); populate_buf(wbuf, size, lba, vinfo.get()); - + populate_zero_buf(wbuf, size, vinfo.get()); vreq = boost::intrusive_ptr< io_req_t >( new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)); } else { - static bool send_iovec{true}; + static bool send_iovec{false}; std::vector< iovec > iovecs{}; if (send_iovec) { for (uint32_t lba_num{0}; lba_num < nlbas; ++lba_num) { @@ -1833,8 +1839,8 @@ class IOTestJob : public TestJob { HS_REL_ASSERT_NOTNULL(wbuf); iovec iov{static_cast< void* >(wbuf), static_cast< size_t >(page_size)}; iovecs.emplace_back(std::move(iov)); - populate_buf(wbuf, page_size, lba + lba_num, vinfo.get()); + populate_zero_buf(wbuf, size, vinfo.get()); } vreq = boost::intrusive_ptr< io_req_t >(new io_req_t(vinfo, Op_type::WRITE, std::move(iovecs), lba, @@ -1842,12 +1848,13 @@ class IOTestJob : public TestJob { } else { uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; populate_buf(wbuf, size, lba, vinfo.get()); + populate_zero_buf(wbuf, size, vinfo.get()); HS_REL_ASSERT_NOTNULL(wbuf); vreq = boost::intrusive_ptr< io_req_t >{ new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)}; } - send_iovec = !send_iovec; + // send_iovec = !send_iovec; } vreq->cookie = static_cast< void* >(this); @@ -1862,6 +1869,40 @@ class IOTestJob : public TestJob { return true; } + void populate_zero_buf(uint8_t* buf, const uint64_t size, const vol_info_t* const vinfo) { + auto page_size = VolInterface::get_instance()->get_page_size(vinfo->vol); + auto nlbas = size / page_size; + static thread_local std::random_device rd{}; + static thread_local std::default_random_engine engine{rd()}; + static thread_local std::uniform_int_distribution< uint8_t > dist{0, 100}; +// std::fill_n(buf + nlbas/2 * page_size, page_size, 0); +// { +// // first zero +// std::fill_n(buf, page_size, 0); +// } + { + // first x lbas the non_zero the rest zero + + if (nlbas >= 2) + std::fill_n(buf + page_size, (nlbas -1) *page_size, 0); + } +// { +// // randomly 5% of lbas can be zero +// for (long unsigned int i = 0; i < nlbas; ++i) { +// if (dist(engine) < 5) { std::fill_n(buf + i * page_size, page_size, 0); } +// } +// } +// { +// // one lba in the middle can be zero (two sub non empty ranges) +// std::uniform_int_distribution< uint8_t > ran_lba{1, nlbas-1}; +// auto l1= ran_lba(engine); +// auto l2= ran_lba(engine); +// auto lb1 = std::min(l1,l2); +// auto lb2 = std::max(l1,l2); +// std::fill_n(buf + l1 * page_size, (lb2 -lb1 +1) *page_size, 0); +// } + } + void populate_buf(uint8_t* const buf, const uint64_t size, const uint64_t lba, const vol_info_t* const vinfo) { static thread_local std::random_device rd{}; static thread_local std::default_random_engine engine{rd()}; @@ -1881,6 +1922,7 @@ class IOTestJob : public TestJob { } bool read_vol(const uint32_t cur, const uint64_t lba, const uint32_t nlbas) { + return true; const auto vinfo{m_voltest->m_vol_info[cur]}; const auto vol{vinfo->vol}; if (vol == nullptr) { return false; } @@ -1958,6 +2000,8 @@ class IOTestJob : public TestJob { } bool verify(const boost::intrusive_ptr< io_req_t >& req, const bool can_panic = true) const { + return true; +#if 0 const auto& vol_req{static_cast< vol_interface_req_ptr >(req)}; const auto verify_buffer{[this, &req, &can_panic](const uint8_t* const validate_buffer, @@ -2063,7 +2107,9 @@ class IOTestJob : public TestJob { tcfg.verify_csum() ? (HS_REL_ASSERT_EQ(total_size_read_csum, req->verify_size)) : (HS_REL_ASSERT_EQ(total_size_read, req->original_size)); return true; +#endif } + }; class VolVerifyJob : public IOTestJob { @@ -2224,6 +2270,21 @@ TEST_F(VolTest, init_io_test) { if (tcfg.remove_file_on_shutdown) { this->remove_files(); } } +TEST_F(VolTest, thin_test) { + this->start_homestore(); + std::unique_ptr< VolCreateDeleteJob > cdjob; + if (tcfg.create_del_with_io || tcfg.delete_with_io) { + cdjob = std::make_unique< VolCreateDeleteJob >(this); + this->start_job(cdjob.get(), wait_type::no_wait); + } + + this->start_io_job(); + output.print("init_io_test"); + + if (tcfg.create_del_with_io || tcfg.delete_with_io) { cdjob->wait_for_completion(); } + this->shutdown(); +} + /*! @test recovery_io_test @brief Tests which does recovery. End up with a clean shutdown diff --git a/src/homeblks/volume/volume.cpp b/src/homeblks/volume/volume.cpp index 77ff6fee7..5220c455f 100644 --- a/src/homeblks/volume/volume.cpp +++ b/src/homeblks/volume/volume.cpp @@ -171,7 +171,7 @@ Volume::Volume(const vol_params& params) : throw std::runtime_error("shutdown in progress"); } m_sobject = m_hb->sobject_mgr()->create_object("volume", params.vol_name, - std::bind(&Volume::get_status, this, std::placeholders::_1)); + std::bind(&Volume::get_status, this, std::placeholders::_1)); m_state = vol_state::UNINITED; } @@ -190,7 +190,7 @@ Volume::Volume(meta_blk* mblk_cookie, sisl::byte_view sb_buf) : HS_REL_ASSERT_EQ(sb->magic, vol_sb_magic, "magic mismatch"); m_hb = HomeBlks::safe_instance(); m_sobject = m_hb->sobject_mgr()->create_object("volume", sb->vol_name, - std::bind(&Volume::get_status, this, std::placeholders::_1)); + std::bind(&Volume::get_status, this, std::placeholders::_1)); } void Volume::init() { @@ -334,8 +334,149 @@ indx_tbl* Volume::recover_indx_tbl(btree_super_block& sb, btree_cp_sb& cp_info) SnapMgr::add_read_tracker, &cp_info); return static_cast< indx_tbl* >(tbl); } +static std::vector< bool > find_non_zero_data(const uint8_t* buf, size_t size, uint32_t nlbas) { + std::vector< bool > empty_blocks; + + auto is_buf_empty = [](const uint8_t* buf, size_t size) -> bool { + return buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1); + }; + for (uint32_t count{0}; count < nlbas; ++count) { + empty_blocks.push_back(!is_buf_empty(buf, size)); + buf += size; + } + return empty_blocks; +} +static std::vector< std::pair< int, int > > get_true_intervals(const std::vector< bool >& empty_blocks) { + std::vector< std::pair< int, int > > result; + + int start = -1; + for (std::size_t i = 0; i < empty_blocks.size(); ++i) { + if (empty_blocks[i]) { + if (start == -1) { start = i; } + } else { + if (start != -1) { + result.emplace_back(start, i - start); + start = -1; + } + } + } + + if (start != -1) { result.emplace_back(start, empty_blocks.size() - start); } + + return result; +} + +static std::vector< std::pair< int, int > > compute_range_intervals(const uint8_t* buf, size_t page_size, + uint32_t nlbas, bool empty_blocks = false) { + std::vector< std::pair< int, int > > intervals; + bool in_empty_region = false; + int current_range_start = -1; + int current_range_length = 1; + auto is_buf_empty = [](const uint8_t* buf, size_t size) -> bool { + return buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1); + }; + for (uint32_t i = 0; i < nlbas; i++) { + const uint8_t* page_start = buf + (i * page_size); + bool is_page_empty = (empty_blocks == is_buf_empty(page_start, page_size)); + if (is_page_empty) { + if (!in_empty_region) { + current_range_start = i; + current_range_length = 1; + in_empty_region = true; + } else { + current_range_length++; + } + } else { + if (in_empty_region) { intervals.push_back(std::make_pair(current_range_start, current_range_length)); } + in_empty_region = false; + } + } + if (in_empty_region) { intervals.push_back(std::make_pair(current_range_start, current_range_length)); } + return intervals; +} std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { + if (!HB_DYNAMIC_CONFIG(general_config->boot_thin_provisioning)){ + return write_internal(iface_req); + } + std::error_condition ret{no_error}; + auto buf = static_cast< uint8_t* >(iface_req->buffer); + auto nlbas = iface_req->nlbas; + auto start_lba = iface_req->lba; + auto non_empty_blocks = compute_range_intervals(buf, get_page_size(), nlbas, false); +// auto vreq = volume_req::make(iface_req); + auto intervals_to_string = [start_lba](const std::vector< std::pair< int, int > >& intervals) -> std::string { + std::vector< std::string > result_strings; + std::transform(intervals.begin(), intervals.end(), std::back_inserter(result_strings), + [start_lba](const std::pair< int, int >& p) -> std::string { + // Use a static buffer to hold the formatted string + static char buffer[32]; + std::snprintf(buffer, sizeof(buffer), "<%ld,%d>", p.first + start_lba, p.second); + return buffer; + }); + return std::accumulate(result_strings.begin(), result_strings.end(), std::string("")); + }; + LOGINFO("original req <{}, {}> => [{}]", iface_req->lba, iface_req->nlbas, intervals_to_string(non_empty_blocks)); + for (const auto &interval : non_empty_blocks) { +//#if 0 + iface_req->lba = start_lba + interval.first; + iface_req->nlbas = interval.second; + iface_req->buffer = buf + (interval.first * get_page_size()); + iface_req->iovecs.clear(); + + + ret = write_internal(iface_req); + if (ret != no_error) { + return ret; + } +//#endif +#if 0 + auto lba = start_lba + interval.first; + auto nlbas = interval.second; + const auto buffer = buf + (interval.first * get_page_size()); + auto req = std::make_unique(buffer, lba, nlbas, iface_req->sync, iface_req->cache); + + req->vol_instance = shared_from_this(); + req->part_of_batch = iface_req->part_of_batch; + req->op_type = Op_type::WRITE; + LOGINFO("sending request to write_internal with lba: {}, nlbas: {} buffer :{}", req->lba, req->nlbas, req->buffer); + //extra + req->read_buf_list = iface_req->read_buf_list; + req->err = iface_req->err; + req->request_id = iface_req->request_id; + req->cache = iface_req->cache; + req->sync = iface_req->sync; + req->is_fail_completed = iface_req->is_fail_completed.load(); + req->cookie = iface_req->cookie; + + ret = write_internal(req.get()); + for (auto x: iface_req->read_buf_list) { + req->read_buf_list.push_back(x); + + } + for (auto p: iface_req->iovecs) { + req->iovecs.push_back(p); + + } + // vol_interface_req i_req(buffer, start_lba, nlbas, iface_req->sync, iface_req->cache); +// i_req.request_id = iface_req->request_id; +// auto ret = write_internal(&i_req); +// if (ret != no_error) { +// return ret; +// } +#endif + } + iface_req->buffer = (void*)(buf); + iface_req->nlbas = nlbas; + iface_req->lba = start_lba; +// check_and_complete_req(vreq, ret); +// interface_req_done(iface_req); + return ret; +} +//std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { +// +//} + std::error_condition Volume::write_internal(const vol_interface_req_ptr& iface_req) { static thread_local std::vector< BlkId > bid{}; std::error_condition ret{no_error}; @@ -344,6 +485,9 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { auto vreq = volume_req::make(iface_req); THIS_VOL_LOG(TRACE, volume, vreq, "write: lba={}, nlbas={}, cache={}", vreq->lba(), vreq->nlbas(), vreq->use_cache()); + LOGINFO("\nwrite: lba={}, nlbas={}, cache={} buffer= {}", vreq->lba(), vreq->nlbas(), + vreq->use_cache(), iface_req->buffer); + print_tree(); COUNTER_INCREMENT(m_metrics, volume_outstanding_data_write_count, 1); // Sanity checks @@ -371,6 +515,7 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { uint64_t start_lba{vreq->lba()}; for (size_t i{0}; i < bid.size(); ++i) { + LOGINFO("bid[{}]: {}", i, bid[i].to_string()); if (bid[i].get_nblks() == 0) { // It should not happen. But it happened once so adding a safe check in case it happens again VOL_LOG_ASSERT(0, vreq, "{}", bid[i].to_string()); @@ -403,7 +548,10 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { } } else { // scatter/gather write + const auto& iovecs{std::get< volume_req::IoVecData >(vreq->data)}; + LOGINFO("write: lba={}, nlbas={}, data size/pagesize: {} iovec[0]_len {} buffer{} iovecs.iov_data {} size {}", vreq->lba(), vreq->nlbas(), + data_size/get_page_size(), static_cast< uint64_t >(iovecs.get().at(0).iov_len)/4096, iface_req->buffer, iovecs.get().at(0).iov_base, iovecs.get().size()); const auto write_iovecs{get_next_iovecs(write_transversal, iovecs, data_size)}; // TO DO: Add option to insert into cache if write cache option true @@ -449,7 +597,11 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { } done: - check_and_complete_req(vreq, ret); +// if (!HB_DYNAMIC_CONFIG(general_config->boot_thin_provisioning)){ + LOGINFO("done calls for check and complete write? {}: lba={}, nlbas={}", vreq->is_write(), vreq->lba(), vreq->nlbas()); + check_and_complete_req(vreq, ret); +// } + return ret; } @@ -584,6 +736,8 @@ bool Volume::check_and_complete_req(const volume_req_ptr& vreq, const std::error vreq->state = volume_req_state::journal_io; vreq->indx_start_time = Clock::now(); auto ireq = boost::static_pointer_cast< indx_req >(vreq); + LOGINFO("complete write? {}: lba={}, nlbas={}, cache={}", vreq->is_write(), vreq->lba(), vreq->nlbas(), + vreq->use_cache()); (vreq->is_unmap()) ? m_indx_mgr->unmap(ireq) : m_indx_mgr->update_indx(ireq); COUNTER_INCREMENT(m_metrics, volume_outstanding_metadata_write_count, 1); } @@ -627,7 +781,12 @@ bool Volume::check_and_complete_req(const volume_req_ptr& vreq, const std::error } #endif THIS_VOL_LOG(TRACE, volume, vreq, "IO DONE"); - interface_req_done(vreq->iface_req); + if (vreq->is_write() && HB_DYNAMIC_CONFIG(general_config->boot_thin_provisioning)){ + + } + else{ + interface_req_done(vreq->iface_req); + } } shutdown_if_needed(); } @@ -656,7 +815,7 @@ void Volume::process_indx_completions(const indx_req_ptr& ireq, std::error_condi THIS_VOL_LOG(TRACE, volume, vreq, "metadata_complete: status={}", vreq->err().message()); HISTOGRAM_OBSERVE(m_metrics, volume_map_write_latency, get_elapsed_time_us(vreq->indx_start_time)); - + LOGINFO("process_indx_completions calls for check and complete write? {}: lba={}, nlbas={}", vreq->is_write(), vreq->lba(), vreq->nlbas()); check_and_complete_req(vreq, err); } @@ -765,7 +924,7 @@ mapping* Volume::get_active_indx() { void Volume::process_read_indx_completions(const boost::intrusive_ptr< indx_req >& ireq, std::error_condition err) { auto ret = no_error; auto vreq = boost::static_pointer_cast< volume_req >(ireq); - + LOGINFO("process_read_indx_completions calls for check and complete read? {}: lba={}, nlbas={}", vreq->is_read_op(), vreq->lba(), vreq->nlbas()); // if there is error or nothing to read anymore, complete this req; if (err != no_error) { ret = err; @@ -889,6 +1048,7 @@ void Volume::process_read_indx_completions(const boost::intrusive_ptr< indx_req /* It is not lock protected. It should be called only by thread for a vreq */ volume_child_req_ptr Volume::create_vol_child_req(const BlkId& bid, const volume_req_ptr& vreq, const uint64_t start_lba, const lba_count_t nlbas) { + volume_child_req_ptr vc_req = volume_child_req::make_request(); vc_req->parent_req = vreq; vc_req->is_read = vreq->is_read_op(); @@ -899,6 +1059,7 @@ volume_child_req_ptr Volume::create_vol_child_req(const BlkId& bid, const volume vc_req->use_cache = vreq->use_cache(); vc_req->part_of_batch = vreq->iface_req->part_of_batch; vc_req->request_id = vreq->request_id; + LOGINFO("create_vol_child_req calls for check and complete write? {}: lba={}, nlbas={}", vreq->is_write(), vreq->lba(), vreq->nlbas()); assert((bid.data_size(HomeBlks::instance()->get_data_pagesz()) % get_page_size()) == 0); vc_req->nlbas = nlbas; @@ -924,11 +1085,11 @@ sisl::status_response Volume::get_status(const sisl::status_request& request) { auto active_indx_json = get_active_indx()->sobject()->run_callback(request).json; if (!active_indx_json.empty()) { response.json["index"] = active_indx_json; } - response.json["name"] = sobject()->name(); + response.json["name"] = sobject()->name(); response.json["type"] = sobject()->type(); response.json["uuid"] = boost::lexical_cast< std::string >(get_uuid()); response.json["state"] = is_offline() ? "Offline" : "Online"; - response.json["size"]= get_size(); + response.json["size"] = get_size(); return response; } diff --git a/src/homeblks/volume/volume.hpp b/src/homeblks/volume/volume.hpp index 10c2fbbf5..619698370 100644 --- a/src/homeblks/volume/volume.hpp +++ b/src/homeblks/volume/volume.hpp @@ -465,6 +465,12 @@ class Volume : public std::enable_shared_from_this< Volume > { */ std::error_condition write(const vol_interface_req_ptr& hb_req); + /* Write to lba + * @param hb_req :- it expects this request to be created + * @return :- no_error if there is no error. It doesn't throw any exception + */ + std::error_condition write_internal(const vol_interface_req_ptr& hb_req); + /* Read from lba * @param hb_req :- it expects this request to be created * @return :- no_error if there is no error. It doesn't throw any exception @@ -729,7 +735,7 @@ struct volume_req : indx_req { csum_t* j_csum = (csum_t*)mem; if (!is_unmap() && active_nlbas_written != nlbas()) { - VOL_ERROR_LOG(vol()->get_name(), "all lbas are not written. lba written {}, lba supposed to write{}", + VOL_ERROR_LOG(vol()->get_name(), "all lbas are not written. lba written {}, lba supposed to write: {}", active_nlbas_written, nlbas()); } for (lba_count_t i{0}; !is_unmap() && i < active_nlbas_written; ++i) { From d864cb1f82417f1d91a6fd61ad8acdcf4ed8b71c Mon Sep 17 00:00:00 2001 From: shosseinimotlagh Date: Mon, 5 Feb 2024 11:27:59 -0800 Subject: [PATCH 2/3] base is ready Add thin flag Add baseline functions for thin pro --- src/.clang-format | 1 - src/engine/common/homestore_config.fbs | 2 ++ src/homeblks/volume/volume.hpp | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/.clang-format b/src/.clang-format index 2f7712008..fdfa11f5e 100644 --- a/src/.clang-format +++ b/src/.clang-format @@ -18,7 +18,6 @@ AlignOperands: false AlignTrailingComments: true AllowShortBlocksOnASingleLine: true AllowShortIfStatementsOnASingleLine: true -AllowShortBlocksOnASingleLine: true AllowShortCaseLabelsOnASingleLine: false # AllowShortFunctionsOnASingleLine: InlineOnly # AllowShortLoopsOnASingleLine: false diff --git a/src/engine/common/homestore_config.fbs b/src/engine/common/homestore_config.fbs index e6dfee051..9b219abe4 100644 --- a/src/engine/common/homestore_config.fbs +++ b/src/engine/common/homestore_config.fbs @@ -142,6 +142,8 @@ table Generic { // percentage of cache used to create indx mempool. It should be more than 100 to // take into account some floating buffers in writeback cache. indx_mempool_percent : uint32 = 110; + + boot_thin_provisioning: bool = false; } table ResourceLimits { diff --git a/src/homeblks/volume/volume.hpp b/src/homeblks/volume/volume.hpp index 619698370..76a905257 100644 --- a/src/homeblks/volume/volume.hpp +++ b/src/homeblks/volume/volume.hpp @@ -464,6 +464,10 @@ class Volume : public std::enable_shared_from_this< Volume > { * @return :- no_error if there is no error. It doesn't throw any exception */ std::error_condition write(const vol_interface_req_ptr& hb_req); + std::error_condition write_internal(const vol_interface_req_ptr& hb_req); + std::error_condition write_thin_provisioning(const vol_interface_req_ptr& hb_req); + + /* Write to lba * @param hb_req :- it expects this request to be created From 2770a52ee4c59fbad2356ad376bf6351a3d4dd33 Mon Sep 17 00:00:00 2001 From: shosseinimotlagh Date: Fri, 9 Feb 2024 11:23:21 -0800 Subject: [PATCH 3/3] Enable zero detecting for requests Added a manual io job test --- conanfile.py | 2 +- src/api/vol_interface.hpp | 18 +- src/engine/blkalloc/blk.h | 2 +- src/engine/common/homestore_utils.cpp | 53 +++- src/engine/common/homestore_utils.hpp | 2 + src/homeblks/homeblks_config.fbs | 3 - src/homeblks/volume/tests/vol_gtest.cpp | 328 +++++++++++++++++++----- src/homeblks/volume/volume.cpp | 151 ++--------- src/homeblks/volume/volume.hpp | 9 - 9 files changed, 359 insertions(+), 209 deletions(-) diff --git a/conanfile.py b/conanfile.py index 61d77e40d..0fb9488de 100644 --- a/conanfile.py +++ b/conanfile.py @@ -2,7 +2,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "3.7.1" + version = "3.7.2" homepage = "https://github.corp.ebay.com/SDS/homestore" description = "HomeStore" diff --git a/src/api/vol_interface.hpp b/src/api/vol_interface.hpp index 53627911b..7436ee5ea 100644 --- a/src/api/vol_interface.hpp +++ b/src/api/vol_interface.hpp @@ -114,6 +114,22 @@ struct vol_interface_req : public sisl::ObjLifeCounter< vol_interface_req > { bool is_write() const { return op_type == Op_type::WRITE; } bool is_unmap() const { return op_type == Op_type::UNMAP; } + bool is_zero_request(const uint64_t page_size) { + if (iovecs.empty()) { + return !buffer || hs_utils::is_buf_zero(static_cast< uint8_t* >(buffer), nlbas * page_size); + } + return is_iovec_zero(); + } + + bool is_iovec_zero() { + for (const auto& iovec : iovecs) { + auto data = static_cast< uint8_t* >(iovec.iov_base); + const size_t size = iovec.iov_len; + if (!hs_utils::is_buf_zero(data, size)) { return false; } + } + return true; + } + friend void intrusive_ptr_add_ref(vol_interface_req* req) { req->refcount.increment(1); } friend void intrusive_ptr_release(vol_interface_req* req) { @@ -316,7 +332,7 @@ class VolInterface { virtual const char* get_name(const VolumePtr& vol) = 0; virtual uint64_t get_size(const VolumePtr& vol) = 0; - virtual std::map get_used_size(const VolumePtr& vol) = 0; + virtual std::map< boost::uuids::uuid, uint64_t > get_used_size(const VolumePtr& vol) = 0; virtual uint64_t get_page_size(const VolumePtr& vol) = 0; virtual boost::uuids::uuid get_uuid(std::shared_ptr< Volume > vol) = 0; virtual sisl::blob at_offset(const boost::intrusive_ptr< BlkBuffer >& buf, uint32_t offset) = 0; diff --git a/src/engine/blkalloc/blk.h b/src/engine/blkalloc/blk.h index 40262f154..5429f77bf 100644 --- a/src/engine/blkalloc/blk.h +++ b/src/engine/blkalloc/blk.h @@ -102,7 +102,7 @@ struct BlkId { bool operator==(const BlkId& other) noexcept { return (compare(*this, other) == 0); } void invalidate() { set(blk_num_t{0}, blk_count_t{0}, s_chunk_num_mask); } - // return invalid_blk_id() { return blk_count_t{0}; } + [[nodiscard]] bool is_valid() const { return (m_chunk_num != s_chunk_num_mask); } [[nodiscard]] BlkId get_blkid_at(const uint32_t offset, const uint32_t pagesz) const { diff --git a/src/engine/common/homestore_utils.cpp b/src/engine/common/homestore_utils.cpp index fa60cbb3a..c161af0fa 100644 --- a/src/engine/common/homestore_utils.cpp +++ b/src/engine/common/homestore_utils.cpp @@ -18,6 +18,7 @@ #include #include #include +#include namespace homestore { uint8_t* hs_utils::iobuf_alloc(const size_t size, const sisl::buftag tag, const size_t alignment) { @@ -76,6 +77,42 @@ sisl::byte_array hs_utils::extract_byte_array(const sisl::byte_view& b, const bo return (is_aligned_needed) ? b.extract(alignment) : b.extract(0); }; +constexpr unsigned long long operator"" _KB(unsigned long long x) { return x * 1024; } + +constexpr std::array< size_t, 7 > predefined_sizes = {4_KB, 8_KB, 16_KB, 32_KB, 64_KB, 128_KB, 256_KB}; + +// Function to initialize the CRC map with predefined sizes +void initialize_crc_map(std::map< size_t, uint16_t >& crc_map) { + std::vector< uint8_t > zero_buf; + for (auto s : predefined_sizes) { + zero_buf.resize(s, 0); // Resize buffer to the required size, filling with zeros + crc_map[s] = crc16_t10dif(init_crc_16, zero_buf.data(), s); + } +} + +uint16_t hs_utils::crc_zero(const size_t size) { + static std::map< size_t, uint16_t > crc_map; + static std::once_flag init_flag; + + // Thread-safe initialization of the CRC map + std::call_once(init_flag, initialize_crc_map, std::ref(crc_map)); + + // Check if the size is already in the map + if (auto it = crc_map.find(size); it != crc_map.end()) { return it->second; } + + std::vector< uint8_t > zero_buf(size, 0); + return crc16_t10dif(init_crc_16, zero_buf.data(), size); +} + +bool hs_utils::is_buf_zero(const uint8_t* buf, size_t size) { + // TODO: subsample the buffer to detect zero request instead of working on the whole buffer to achieve constant + // processing time for large buffer size requests. Needs to investigate the performance impact of this change + // in end2end testing. + auto zero_crc = crc_zero(size); + const auto crc = crc16_t10dif(init_crc_16, buf, size); + return (crc == zero_crc) ? (buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1)) : false; +} + std::string hs_utils::encodeBase64(const uint8_t* first, std::size_t size) { using Base64FromBinary = boost::archive::iterators::base64_from_binary< boost::archive::iterators::transform_width< const char*, // sequence of chars @@ -90,15 +127,12 @@ std::string hs_utils::encodeBase64(const uint8_t* first, std::size_t size) { return encoded.append(bytes_to_pad, '='); } -std::string hs_utils::encodeBase64(const sisl::byte_view& b){ - return encodeBase64(b.bytes(), b.size()); -} +std::string hs_utils::encodeBase64(const sisl::byte_view& b) { return encodeBase64(b.bytes(), b.size()); } -template -void hs_utils::decodeBase64(const std::string &encoded_data, T out) -{ +template < typename T > +void hs_utils::decodeBase64(const std::string& encoded_data, T out) { using BinaryFromBase64 = boost::archive::iterators::transform_width< - boost::archive::iterators::binary_from_base64, + boost::archive::iterators::binary_from_base64< std::string::const_iterator >, 8, // get a view of 8 bit 6 // from a sequence of 6 bit >; @@ -107,14 +141,13 @@ void hs_utils::decodeBase64(const std::string &encoded_data, T out) std::replace(begin(unpadded_data), end(unpadded_data), '=', 'A'); // A_64 == \0 std::string decoded_data{BinaryFromBase64{begin(unpadded_data)}, - BinaryFromBase64{begin(unpadded_data) + unpadded_data.length()}}; + BinaryFromBase64{begin(unpadded_data) + unpadded_data.length()}}; decoded_data.erase(end(decoded_data) - bytes_to_pad, end(decoded_data)); std::copy(begin(decoded_data), end(decoded_data), out); } -std::string hs_utils::decodeBase64(const std::string &encoded_data) -{ +std::string hs_utils::decodeBase64(const std::string& encoded_data) { std::string rv; decodeBase64(encoded_data, std::back_inserter(rv)); return rv; diff --git a/src/engine/common/homestore_utils.hpp b/src/engine/common/homestore_utils.hpp index b1313df96..de081d7f8 100644 --- a/src/engine/common/homestore_utils.hpp +++ b/src/engine/common/homestore_utils.hpp @@ -38,6 +38,8 @@ class hs_utils { static sisl::byte_array make_byte_array(const uint64_t size, const bool is_aligned_needed, const sisl::buftag tag, const size_t alignment); static hs_uuid_t gen_system_uuid(); + static uint16_t crc_zero(const size_t size); + static bool is_buf_zero(const uint8_t* buf, size_t size); static std::string encodeBase64(const uint8_t* first, std::size_t size); static std::string encodeBase64(const sisl::byte_view& b); template static void decodeBase64(const std::string &encoded_data, T out); diff --git a/src/homeblks/homeblks_config.fbs b/src/homeblks/homeblks_config.fbs index a99da66d2..42566a9c4 100644 --- a/src/homeblks/homeblks_config.fbs +++ b/src/homeblks/homeblks_config.fbs @@ -34,9 +34,6 @@ table GeneralConfig { // These fields should only be changed by agent through workflow boot_restricted_mode: bool = false; boot_safe_mode: bool = false; - - // This field is for enabling thin provisioing on booting - boot_thin_provisioning: bool = true; } table HomeBlksSettings { diff --git a/src/homeblks/volume/tests/vol_gtest.cpp b/src/homeblks/volume/tests/vol_gtest.cpp index 932b2fa7f..8f31a501e 100644 --- a/src/homeblks/volume/tests/vol_gtest.cpp +++ b/src/homeblks/volume/tests/vol_gtest.cpp @@ -174,6 +174,9 @@ struct TestCfg { uint32_t p_vol_files_space; std::string flip_name; std::string vol_copy_file_path; + uint32_t p_zero_buffer; + uint32_t zero_buffer_period; + bool thin_provision_enable{false}; bool verify_csum() { return verify_type == verify_type_t::csum; } bool verify_data() { return verify_type == verify_type_t::data; } @@ -575,6 +578,7 @@ class VolTest : public ::testing::Test { friend class VolCreateDeleteJob; friend class IOTestJob; friend class VolVerifyJob; + friend class IOManualTestJob; protected: std::atomic< size_t > outstanding_ios; @@ -620,12 +624,20 @@ class VolTest : public ::testing::Test { // vol_create_del_test = false; // move_verify_to_done = false; print_startTime = Clock::now(); + if (tcfg.thin_provision_enable) { + HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = true; }); + HS_SETTINGS_FACTORY().save(); + } // outstanding_ios = 0; } virtual ~VolTest() override { if (init_buf) { iomanager.iobuf_free(static_cast< uint8_t* >(init_buf)); } + if (tcfg.thin_provision_enable) { + HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = false; }); + HS_SETTINGS_FACTORY().save(); + } } VolTest(const VolTest&) = delete; @@ -1675,8 +1687,7 @@ class IOTestJob : public TestJob { // lba: [0, max_vol_blks - max_blks) std::uniform_int_distribution< uint64_t > lba_random{0, vinfo->max_vol_blks - max_blks - 1}; // nlbas: [1, max_blks] -// std::uniform_int_distribution< uint32_t > nlbas_random{1, max_blks}; - std::uniform_int_distribution< uint32_t > nlbas_random{1, 5}; + std::uniform_int_distribution< uint32_t > nlbas_random{1, max_blks}; // we won't be writing more then 128 blocks in one io uint32_t attempt{1}; @@ -1816,22 +1827,22 @@ class IOTestJob : public TestJob { const uint64_t page_size{VolInterface::get_instance()->get_page_size(vol)}; const uint64_t size{nlbas * page_size}; + static std::atomic< uint32_t > remaining_period{tcfg.zero_buffer_period}; + uint32_t zero_counts_per_period = tcfg.p_zero_buffer * tcfg.zero_buffer_period / 100; boost::intrusive_ptr< io_req_t > vreq{}; - - static thread_local std::random_device rd{}; - static thread_local std::default_random_engine engine{rd()}; - static thread_local std::uniform_int_distribution< uint8_t > dist{0, 1}; - if (tcfg.write_cache) { uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; HS_REL_ASSERT_NOTNULL(wbuf); populate_buf(wbuf, size, lba, vinfo.get()); - populate_zero_buf(wbuf, size, vinfo.get()); + if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) && + remaining_period.fetch_sub(1) < zero_counts_per_period) { + populate_zero_buf(wbuf, size); + } vreq = boost::intrusive_ptr< io_req_t >( new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)); } else { - static bool send_iovec{false}; + static bool send_iovec{true}; std::vector< iovec > iovecs{}; if (send_iovec) { for (uint32_t lba_num{0}; lba_num < nlbas; ++lba_num) { @@ -1840,7 +1851,14 @@ class IOTestJob : public TestJob { iovec iov{static_cast< void* >(wbuf), static_cast< size_t >(page_size)}; iovecs.emplace_back(std::move(iov)); populate_buf(wbuf, page_size, lba + lba_num, vinfo.get()); - populate_zero_buf(wbuf, size, vinfo.get()); + } + if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) && + remaining_period.fetch_sub(1) < zero_counts_per_period) { + for (const auto& iovec : iovecs) { + auto data = static_cast< uint8_t* >(iovec.iov_base); + const size_t size = iovec.iov_len; + populate_zero_buf(data, size); + } } vreq = boost::intrusive_ptr< io_req_t >(new io_req_t(vinfo, Op_type::WRITE, std::move(iovecs), lba, @@ -1848,13 +1866,17 @@ class IOTestJob : public TestJob { } else { uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; populate_buf(wbuf, size, lba, vinfo.get()); - populate_zero_buf(wbuf, size, vinfo.get()); + if (HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning) && + remaining_period.fetch_sub(1) < zero_counts_per_period) { + populate_zero_buf(wbuf, size); + } HS_REL_ASSERT_NOTNULL(wbuf); vreq = boost::intrusive_ptr< io_req_t >{ new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)}; } - // send_iovec = !send_iovec; + if (remaining_period.load() == 0) { remaining_period.store(tcfg.zero_buffer_period); } + send_iovec = !send_iovec; } vreq->cookie = static_cast< void* >(this); @@ -1869,40 +1891,6 @@ class IOTestJob : public TestJob { return true; } - void populate_zero_buf(uint8_t* buf, const uint64_t size, const vol_info_t* const vinfo) { - auto page_size = VolInterface::get_instance()->get_page_size(vinfo->vol); - auto nlbas = size / page_size; - static thread_local std::random_device rd{}; - static thread_local std::default_random_engine engine{rd()}; - static thread_local std::uniform_int_distribution< uint8_t > dist{0, 100}; -// std::fill_n(buf + nlbas/2 * page_size, page_size, 0); -// { -// // first zero -// std::fill_n(buf, page_size, 0); -// } - { - // first x lbas the non_zero the rest zero - - if (nlbas >= 2) - std::fill_n(buf + page_size, (nlbas -1) *page_size, 0); - } -// { -// // randomly 5% of lbas can be zero -// for (long unsigned int i = 0; i < nlbas; ++i) { -// if (dist(engine) < 5) { std::fill_n(buf + i * page_size, page_size, 0); } -// } -// } -// { -// // one lba in the middle can be zero (two sub non empty ranges) -// std::uniform_int_distribution< uint8_t > ran_lba{1, nlbas-1}; -// auto l1= ran_lba(engine); -// auto l2= ran_lba(engine); -// auto lb1 = std::min(l1,l2); -// auto lb2 = std::max(l1,l2); -// std::fill_n(buf + l1 * page_size, (lb2 -lb1 +1) *page_size, 0); -// } - } - void populate_buf(uint8_t* const buf, const uint64_t size, const uint64_t lba, const vol_info_t* const vinfo) { static thread_local std::random_device rd{}; static thread_local std::default_random_engine engine{rd()}; @@ -1921,8 +1909,9 @@ class IOTestJob : public TestJob { } } + void populate_zero_buf(uint8_t* buf, const uint64_t size) { std::fill_n(buf, size, 0); } + bool read_vol(const uint32_t cur, const uint64_t lba, const uint32_t nlbas) { - return true; const auto vinfo{m_voltest->m_vol_info[cur]}; const auto vol{vinfo->vol}; if (vol == nullptr) { return false; } @@ -2000,8 +1989,6 @@ class IOTestJob : public TestJob { } bool verify(const boost::intrusive_ptr< io_req_t >& req, const bool can_panic = true) const { - return true; -#if 0 const auto& vol_req{static_cast< vol_interface_req_ptr >(req)}; const auto verify_buffer{[this, &req, &can_panic](const uint8_t* const validate_buffer, @@ -2107,9 +2094,200 @@ class IOTestJob : public TestJob { tcfg.verify_csum() ? (HS_REL_ASSERT_EQ(total_size_read_csum, req->verify_size)) : (HS_REL_ASSERT_EQ(total_size_read, req->original_size)); return true; -#endif } +}; + +// This test job is used to test the IOs with manual requests. For sake of simplicity, we will use the same volume for +// all requests. The caller needs to load the requests before starting the job. The requests are loaded in the form of +// Write with three or four parameters and Read with three parameters. The value is optional and is used only for write +// requests. +class IOManualTestJob : public TestJob { +public: + using TupleVariant = std::variant< std::tuple< std::string, uint64_t, uint32_t >, + std::tuple< std::string, uint64_t, uint32_t, uint8_t > >; + using RequestVector = std::vector< IOManualTestJob::TupleVariant >; + IOManualTestJob(VolTest* const test) : TestJob(test, 1, true) { + vol = m_voltest->m_vol_info[0]->vol; + vinfo = m_voltest->m_vol_info[0]; + page_size = VolInterface::get_instance()->get_page_size(vol); + const auto vol_size = VolInterface::get_instance()->get_size(vol); + const auto max_lbas = vol_size / page_size; + m_validate_buf.resize(max_lbas); + std::fill(m_validate_buf.begin(), m_validate_buf.end(), 0); + LOGINFO("Manual volume size {} max_lbas {}", vol_size, max_lbas); + } + virtual ~IOManualTestJob() override = default; + IOManualTestJob(const IOManualTestJob&) = delete; + IOManualTestJob(IOManualTestJob&&) noexcept = delete; + IOManualTestJob& operator=(const IOManualTestJob&) = delete; + IOManualTestJob& operator=(IOManualTestJob&&) noexcept = delete; + + virtual void run_one_iteration() override { + if (m_outstanding_ios.load() == 0 && m_current_request < m_requests.size()) { + const auto& request = m_requests[m_current_request]; + if (std::holds_alternative< std::tuple< std::string, uint64_t, uint32_t > >(request)) { + auto& tuple = std::get< std::tuple< std::string, uint64_t, uint32_t > >(request); + auto start_lba = std::get< 1 >(tuple); + auto nlbas = std::get< 2 >(tuple); + if (std::get< 0 >(tuple) == "write") { + write_vol(start_lba, nlbas); + auto it = m_validate_buf.begin() + start_lba; + std::fill(it, it + nlbas, 0); + } else { + read_vol(start_lba, nlbas); + } + } else if (std::holds_alternative< std::tuple< std::string, uint64_t, uint32_t, uint8_t > >(request)) { + auto& tuple = std::get< std::tuple< std::string, uint64_t, uint32_t, uint8_t > >(request); + auto start_lba = std::get< 1 >(tuple); + auto nlbas = std::get< 2 >(tuple); + auto value = std::get< 3 >(tuple); + if (std::get< 0 >(tuple) == "write") { + write_vol(start_lba, nlbas, value); + auto it = m_validate_buf.begin() + start_lba; + std::fill(it, it + nlbas, value); + } else { + // in case, the caller mistakenly added a value for a read request, we will ignore the value + read_vol(start_lba, nlbas); + } + } + } + } + + void on_one_iteration_completed(const boost::intrusive_ptr< io_req_t >& req) override { + --m_outstanding_ios; + if (req->op_type == Op_type::READ) { verify_request(req); } + req->vol_info->ref_cnt.decrement_testz(1); + } + uint64_t read_buffer(std::vector< iovec >& iovecs, uint8_t* buf) { + uint8_t* current_position = buf; + for (const auto& iov : iovecs) { + std::memcpy(current_position, iov.iov_base, iov.iov_len); + current_position += iov.iov_len; + } + return static_cast< uint64_t >(current_position - buf); + } + void verify_request(const boost::intrusive_ptr< io_req_t >& req) { + std::shared_ptr< uint8_t > buf(new uint8_t[req->nlbas * page_size]); + std::fill_n(buf.get(), req->nlbas * page_size, 0); + auto total_size_read = read_buffer(req->iovecs, buf.get()); + HS_REL_ASSERT_EQ(req->nlbas * page_size, total_size_read); + auto raw_buf = buf.get(); + for (size_t i = 0; i < req->nlbas; i++) { + HS_REL_ASSERT_EQ(raw_buf[i * page_size], m_validate_buf[req->lba + i]); + } + } + bool time_to_stop() const override { return m_current_request == m_requests.size(); } + + virtual bool is_job_done() const override { return (m_outstanding_ios == 0); } + bool is_async_job() const override { return true; } + std::string job_name() const { return "IO Manual Job"; } + void load_requests(RequestVector& requests) { m_requests = requests; } + +protected: + VolumePtr vol; + std::shared_ptr< vol_info_t > vinfo; + uint64_t page_size; + std::atomic< uint64_t > m_outstanding_ios{0}; + std::atomic< uint64_t > m_current_request{0}; + std::vector< uint8_t > m_validate_buf; + RequestVector m_requests; + + bool write_vol(const uint64_t lba, const uint32_t nlbas, const uint8_t value = 0) { + ++m_current_request; + ++m_outstanding_ios; + const uint64_t size{nlbas * page_size}; + boost::intrusive_ptr< io_req_t > vreq{}; + if (tcfg.write_cache) { + uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; + populate_buf(wbuf, size, value); + vreq = boost::intrusive_ptr< io_req_t >( + new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)); + } else { + static bool send_iovec{true}; + std::vector< iovec > iovecs{}; + if (send_iovec) { + for (uint32_t lba_num{0}; lba_num < nlbas; ++lba_num) { + uint8_t* const wbuf{iomanager.iobuf_alloc(512, page_size)}; + iovec iov{static_cast< void* >(wbuf), static_cast< size_t >(page_size)}; + iovecs.emplace_back(std::move(iov)); + populate_buf(wbuf, page_size, value); + } + vreq = boost::intrusive_ptr< io_req_t >(new io_req_t(vinfo, Op_type::WRITE, std::move(iovecs), lba, + nlbas, tcfg.verify_csum(), tcfg.write_cache)); + } else { + uint8_t* const wbuf{iomanager.iobuf_alloc(512, size)}; + populate_buf(wbuf, size, value); + vreq = boost::intrusive_ptr< io_req_t >{ + new io_req_t(vinfo, Op_type::WRITE, wbuf, lba, nlbas, tcfg.verify_csum(), tcfg.write_cache)}; + } + send_iovec = !send_iovec; + } + vreq->cookie = static_cast< void* >(this); + ++m_voltest->output.write_cnt; + vinfo->ref_cnt.increment(1); + const auto ret_io{VolInterface::get_instance()->write(vol, vreq)}; + LOGDEBUG("Wrote lba: {}, nlbas: {} outstanding_ios={}, iovec(s)={}, cache={}", lba, nlbas, + m_outstanding_ios.load(), (tcfg.write_iovec != 0 ? true : false), + (tcfg.write_cache != 0 ? true : false)); + if (ret_io != no_error) { return false; } + return true; + } + + void populate_buf(uint8_t* buf, const uint64_t size, const uint8_t value = 0) { std::fill_n(buf, size, value); } + + bool read_vol(const uint64_t lba, const uint32_t nlbas) { + ++m_current_request; + if (read_vol_internal(vinfo, vol, lba, nlbas, false)) { return true; } + return false; + } + + boost::intrusive_ptr< io_req_t > read_vol_internal(std::shared_ptr< vol_info_t > vinfo, VolumePtr vol, + const uint64_t lba, const uint32_t nlbas, + const bool sync = false) { + boost::intrusive_ptr< io_req_t > vreq{}; + if (tcfg.read_cache) { + vreq = boost::intrusive_ptr< io_req_t >{ + new io_req_t{vinfo, Op_type::READ, nullptr, lba, nlbas, tcfg.verify_csum(), tcfg.read_cache, sync}}; + } else { + static bool send_iovec{true}; + if (send_iovec) { + std::vector< iovec > iovecs{}; + for (uint32_t lba_num{0}; lba_num < nlbas; ++lba_num) { + uint8_t* const rbuf{iomanager.iobuf_alloc(512, page_size)}; + std::memset(static_cast< void* >(rbuf), 0, page_size); + + HS_REL_ASSERT_NOTNULL(rbuf); + iovec iov{static_cast< void* >(rbuf), static_cast< size_t >(page_size)}; + iovecs.emplace_back(std::move(iov)); + } + + vreq = boost::intrusive_ptr< io_req_t >{new io_req_t{vinfo, Op_type::READ, std::move(iovecs), lba, + nlbas, tcfg.verify_csum(), tcfg.read_cache, sync}}; + } else { + uint8_t* const rbuf{iomanager.iobuf_alloc(512, nlbas * page_size)}; + std::memset(static_cast< void* >(rbuf), 0, nlbas * page_size); + vreq = boost::intrusive_ptr< io_req_t >{ + new io_req_t{vinfo, Op_type::READ, rbuf, lba, nlbas, tcfg.verify_csum(), tcfg.read_cache, sync}}; + } + send_iovec = !send_iovec; + } + vreq->cookie = static_cast< void* >(this); + + ++m_voltest->output.read_cnt; + ++m_outstanding_ios; + vinfo->ref_cnt.increment(1); + const auto ret_io{VolInterface::get_instance()->read(vol, vreq)}; + LOGDEBUG("Read lba: {}, nlbas: {} outstanding_ios={}, iovec(s)={}, cache={}", lba, nlbas, + m_outstanding_ios.load(), (tcfg.read_iovec != 0 ? true : false), + (tcfg.read_cache != 0 ? true : false)); + if (sync) { + --m_outstanding_ios; + vinfo->ref_cnt.decrement(1); + } + if (ret_io != no_error) { return nullptr; } + return vreq; + } }; class VolVerifyJob : public IOTestJob { @@ -2269,20 +2447,42 @@ TEST_F(VolTest, init_io_test) { this->shutdown(); if (tcfg.remove_file_on_shutdown) { this->remove_files(); } } - TEST_F(VolTest, thin_test) { + HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = true; }); + HS_SETTINGS_FACTORY().save(); + tcfg.max_vols = 1; + tcfg.verify_type = static_cast< verify_type_t >(3); + tcfg.max_disk_capacity = 1 * (1ul << 30); // 1GB + tcfg.p_volume_size = 1; // 1% of 2 (devices) * 1G = 20 MB volume + output.print("thin_test"); + this->start_homestore(); - std::unique_ptr< VolCreateDeleteJob > cdjob; - if (tcfg.create_del_with_io || tcfg.delete_with_io) { - cdjob = std::make_unique< VolCreateDeleteJob >(this); - this->start_job(cdjob.get(), wait_type::no_wait); - } - this->start_io_job(); - output.print("init_io_test"); + std::unique_ptr< IOManualTestJob > job; + job = std::make_unique< IOManualTestJob >(this); + // request = op=[write|read], lba, nlbas [value], value is optional and is used only for write requests and If not + // provided, it defaults to 0. + IOManualTestJob::RequestVector reqs = { + // Case one: normal read (no zero padding) + std::make_tuple("write", 0, 100, 4), std::make_tuple("read", 5, 20), + // Case two: zero padding, read after write + std::make_tuple("write", 1, 10), std::make_tuple("read", 1, 20), std::make_tuple("read", 5, 3), + // Case three: zero padding, overlapping for read + std::make_tuple("write", 100, 200), std::make_tuple("read", 150, 250), + // Case four: no write + std::make_tuple("read", 800, 5)}; + job->load_requests(reqs); + + this->start_job(job.get(), wait_type::for_completion); - if (tcfg.create_del_with_io || tcfg.delete_with_io) { cdjob->wait_for_completion(); } + LOGINFO("All volumes are deleted, do a shutdown of homestore"); this->shutdown(); + + LOGINFO("Shutdown of homestore is completed, removing files"); + this->remove_files(); + + HS_SETTINGS_FACTORY().modifiable_settings([](auto& s) { s.generic.boot_thin_provisioning = false; }); + HS_SETTINGS_FACTORY().save(); } /*! @@ -2743,6 +2943,13 @@ SISL_OPTION_GROUP( (io_size, "", "io_size", "io size in KB", ::cxxopts::value< uint32_t >()->default_value("4"), "io_size"), (vol_copy_file_path, "", "vol_copy_file_path", "file path for copied volume", ::cxxopts::value< std::string >()->default_value(""), "path [...]"), + (p_zero_buffer, "", "p_zero_buffer", + "percentage of zero buffer occurrence for testing thin provisioning within period", + ::cxxopts::value< uint32_t >()->default_value("70"), "0 to 100"), + (zero_buffer_period, "", "zero_buffer_period", " the period of consecutive zero buffer occurrence", + ::cxxopts::value< uint32_t >()->default_value("100"), "0 to 100"), + (thin_provision_enable, "", "thin_provision_enable", " enable thin provisioning", + ::cxxopts::value< uint32_t >()->default_value("0"), "flag"), (unmap_frequency, "", "unmap_frequency", "do unmap for every N", ::cxxopts::value< uint64_t >()->default_value("100"), "unmap_frequency")) @@ -2819,6 +3026,9 @@ int main(int argc, char* argv[]) { gcfg.app_mem_size_in_gb = SISL_OPTIONS["app_mem_size_in_gb"].as< uint32_t >(); gcfg.vol_copy_file_path = SISL_OPTIONS["vol_copy_file_path"].as< std::string >(); const auto io_size_in_kb = SISL_OPTIONS["io_size"].as< uint32_t >(); + gcfg.p_zero_buffer = SISL_OPTIONS["p_zero_buffer"].as< uint32_t >(); + gcfg.zero_buffer_period = SISL_OPTIONS["zero_buffer_period"].as< uint32_t >(); + gcfg.thin_provision_enable = SISL_OPTIONS["thin_provision_enable"].as< uint32_t >() != 0 ? true : false; gcfg.io_size = io_size_in_kb * 1024; HS_REL_ASSERT(io_size_in_kb && (io_size_in_kb % 4 == 0), diff --git a/src/homeblks/volume/volume.cpp b/src/homeblks/volume/volume.cpp index 5220c455f..ec2bcef49 100644 --- a/src/homeblks/volume/volume.cpp +++ b/src/homeblks/volume/volume.cpp @@ -334,50 +334,19 @@ indx_tbl* Volume::recover_indx_tbl(btree_super_block& sb, btree_cp_sb& cp_info) SnapMgr::add_read_tracker, &cp_info); return static_cast< indx_tbl* >(tbl); } -static std::vector< bool > find_non_zero_data(const uint8_t* buf, size_t size, uint32_t nlbas) { - std::vector< bool > empty_blocks; - - auto is_buf_empty = [](const uint8_t* buf, size_t size) -> bool { - return buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1); - }; - for (uint32_t count{0}; count < nlbas; ++count) { - empty_blocks.push_back(!is_buf_empty(buf, size)); - buf += size; - } - return empty_blocks; -} -static std::vector< std::pair< int, int > > get_true_intervals(const std::vector< bool >& empty_blocks) { - std::vector< std::pair< int, int > > result; - - int start = -1; - for (std::size_t i = 0; i < empty_blocks.size(); ++i) { - if (empty_blocks[i]) { - if (start == -1) { start = i; } - } else { - if (start != -1) { - result.emplace_back(start, i - start); - start = -1; - } - } - } - - if (start != -1) { result.emplace_back(start, empty_blocks.size() - start); } - - return result; -} +#if 0 +// TODO: use these functions for near future optimization of write path for thin provisioning volumes to enable skipping +// writing empty blocks in subrange intervals for requested buffer instead of detecting the all-zero-buffer requests. static std::vector< std::pair< int, int > > compute_range_intervals(const uint8_t* buf, size_t page_size, uint32_t nlbas, bool empty_blocks = false) { std::vector< std::pair< int, int > > intervals; bool in_empty_region = false; int current_range_start = -1; int current_range_length = 1; - auto is_buf_empty = [](const uint8_t* buf, size_t size) -> bool { - return buf[0] == 0 && !std::memcmp(buf, buf + 1, size - 1); - }; for (uint32_t i = 0; i < nlbas; i++) { const uint8_t* page_start = buf + (i * page_size); - bool is_page_empty = (empty_blocks == is_buf_empty(page_start, page_size)); + bool is_page_empty = (empty_blocks == is_buf_zero(page_start, page_size)); if (is_page_empty) { if (!in_empty_region) { current_range_start = i; @@ -395,16 +364,7 @@ static std::vector< std::pair< int, int > > compute_range_intervals(const uint8_ return intervals; } -std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { - if (!HB_DYNAMIC_CONFIG(general_config->boot_thin_provisioning)){ - return write_internal(iface_req); - } - std::error_condition ret{no_error}; - auto buf = static_cast< uint8_t* >(iface_req->buffer); - auto nlbas = iface_req->nlbas; - auto start_lba = iface_req->lba; - auto non_empty_blocks = compute_range_intervals(buf, get_page_size(), nlbas, false); -// auto vreq = volume_req::make(iface_req); +static std::string print_ranges(lba_t start_lba, const std::vector< std::pair< int, int > >& intervals) { auto intervals_to_string = [start_lba](const std::vector< std::pair< int, int > >& intervals) -> std::string { std::vector< std::string > result_strings; std::transform(intervals.begin(), intervals.end(), std::back_inserter(result_strings), @@ -416,67 +376,28 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { }); return std::accumulate(result_strings.begin(), result_strings.end(), std::string("")); }; - LOGINFO("original req <{}, {}> => [{}]", iface_req->lba, iface_req->nlbas, intervals_to_string(non_empty_blocks)); - for (const auto &interval : non_empty_blocks) { -//#if 0 - iface_req->lba = start_lba + interval.first; - iface_req->nlbas = interval.second; - iface_req->buffer = buf + (interval.first * get_page_size()); - iface_req->iovecs.clear(); - - - ret = write_internal(iface_req); - if (ret != no_error) { - return ret; - } -//#endif -#if 0 - auto lba = start_lba + interval.first; - auto nlbas = interval.second; - const auto buffer = buf + (interval.first * get_page_size()); - auto req = std::make_unique(buffer, lba, nlbas, iface_req->sync, iface_req->cache); - - req->vol_instance = shared_from_this(); - req->part_of_batch = iface_req->part_of_batch; - req->op_type = Op_type::WRITE; - LOGINFO("sending request to write_internal with lba: {}, nlbas: {} buffer :{}", req->lba, req->nlbas, req->buffer); - //extra - req->read_buf_list = iface_req->read_buf_list; - req->err = iface_req->err; - req->request_id = iface_req->request_id; - req->cache = iface_req->cache; - req->sync = iface_req->sync; - req->is_fail_completed = iface_req->is_fail_completed.load(); - req->cookie = iface_req->cookie; - - ret = write_internal(req.get()); - for (auto x: iface_req->read_buf_list) { - req->read_buf_list.push_back(x); - - } - for (auto p: iface_req->iovecs) { - req->iovecs.push_back(p); + return intervals_to_string(intervals); +} +#endif +std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { + std::error_condition ret{no_error}; + if (!HS_DYNAMIC_CONFIG(generic->boot_thin_provisioning)) { + return write_internal(iface_req); + } else { + if (iface_req->is_zero_request(get_page_size())) { + THIS_VOL_LOG(TRACE, volume, iface_req, "zero request <{}, {}>", iface_req->lba, iface_req->nlbas); + iface_req->op_type = Op_type::UNMAP; + ret = unmap(iface_req); + } else { + ret = write_internal(iface_req); } - // vol_interface_req i_req(buffer, start_lba, nlbas, iface_req->sync, iface_req->cache); -// i_req.request_id = iface_req->request_id; -// auto ret = write_internal(&i_req); -// if (ret != no_error) { -// return ret; -// } -#endif } - iface_req->buffer = (void*)(buf); - iface_req->nlbas = nlbas; - iface_req->lba = start_lba; -// check_and_complete_req(vreq, ret); -// interface_req_done(iface_req); + iface_req->op_type = Op_type::WRITE; return ret; } -//std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { -// -//} - std::error_condition Volume::write_internal(const vol_interface_req_ptr& iface_req) { + +std::error_condition Volume::write_internal(const vol_interface_req_ptr& iface_req) { static thread_local std::vector< BlkId > bid{}; std::error_condition ret{no_error}; @@ -485,9 +406,6 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { auto vreq = volume_req::make(iface_req); THIS_VOL_LOG(TRACE, volume, vreq, "write: lba={}, nlbas={}, cache={}", vreq->lba(), vreq->nlbas(), vreq->use_cache()); - LOGINFO("\nwrite: lba={}, nlbas={}, cache={} buffer= {}", vreq->lba(), vreq->nlbas(), - vreq->use_cache(), iface_req->buffer); - print_tree(); COUNTER_INCREMENT(m_metrics, volume_outstanding_data_write_count, 1); // Sanity checks @@ -515,7 +433,6 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { uint64_t start_lba{vreq->lba()}; for (size_t i{0}; i < bid.size(); ++i) { - LOGINFO("bid[{}]: {}", i, bid[i].to_string()); if (bid[i].get_nblks() == 0) { // It should not happen. But it happened once so adding a safe check in case it happens again VOL_LOG_ASSERT(0, vreq, "{}", bid[i].to_string()); @@ -548,10 +465,7 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { } } else { // scatter/gather write - const auto& iovecs{std::get< volume_req::IoVecData >(vreq->data)}; - LOGINFO("write: lba={}, nlbas={}, data size/pagesize: {} iovec[0]_len {} buffer{} iovecs.iov_data {} size {}", vreq->lba(), vreq->nlbas(), - data_size/get_page_size(), static_cast< uint64_t >(iovecs.get().at(0).iov_len)/4096, iface_req->buffer, iovecs.get().at(0).iov_base, iovecs.get().size()); const auto write_iovecs{get_next_iovecs(write_transversal, iovecs, data_size)}; // TO DO: Add option to insert into cache if write cache option true @@ -597,11 +511,7 @@ std::error_condition Volume::write(const vol_interface_req_ptr& iface_req) { } done: -// if (!HB_DYNAMIC_CONFIG(general_config->boot_thin_provisioning)){ - LOGINFO("done calls for check and complete write? {}: lba={}, nlbas={}", vreq->is_write(), vreq->lba(), vreq->nlbas()); - check_and_complete_req(vreq, ret); -// } - + check_and_complete_req(vreq, ret); return ret; } @@ -736,8 +646,6 @@ bool Volume::check_and_complete_req(const volume_req_ptr& vreq, const std::error vreq->state = volume_req_state::journal_io; vreq->indx_start_time = Clock::now(); auto ireq = boost::static_pointer_cast< indx_req >(vreq); - LOGINFO("complete write? {}: lba={}, nlbas={}, cache={}", vreq->is_write(), vreq->lba(), vreq->nlbas(), - vreq->use_cache()); (vreq->is_unmap()) ? m_indx_mgr->unmap(ireq) : m_indx_mgr->update_indx(ireq); COUNTER_INCREMENT(m_metrics, volume_outstanding_metadata_write_count, 1); } @@ -781,12 +689,7 @@ bool Volume::check_and_complete_req(const volume_req_ptr& vreq, const std::error } #endif THIS_VOL_LOG(TRACE, volume, vreq, "IO DONE"); - if (vreq->is_write() && HB_DYNAMIC_CONFIG(general_config->boot_thin_provisioning)){ - - } - else{ - interface_req_done(vreq->iface_req); - } + interface_req_done(vreq->iface_req); } shutdown_if_needed(); } @@ -815,7 +718,7 @@ void Volume::process_indx_completions(const indx_req_ptr& ireq, std::error_condi THIS_VOL_LOG(TRACE, volume, vreq, "metadata_complete: status={}", vreq->err().message()); HISTOGRAM_OBSERVE(m_metrics, volume_map_write_latency, get_elapsed_time_us(vreq->indx_start_time)); - LOGINFO("process_indx_completions calls for check and complete write? {}: lba={}, nlbas={}", vreq->is_write(), vreq->lba(), vreq->nlbas()); + check_and_complete_req(vreq, err); } @@ -924,7 +827,7 @@ mapping* Volume::get_active_indx() { void Volume::process_read_indx_completions(const boost::intrusive_ptr< indx_req >& ireq, std::error_condition err) { auto ret = no_error; auto vreq = boost::static_pointer_cast< volume_req >(ireq); - LOGINFO("process_read_indx_completions calls for check and complete read? {}: lba={}, nlbas={}", vreq->is_read_op(), vreq->lba(), vreq->nlbas()); + // if there is error or nothing to read anymore, complete this req; if (err != no_error) { ret = err; @@ -1048,7 +951,6 @@ void Volume::process_read_indx_completions(const boost::intrusive_ptr< indx_req /* It is not lock protected. It should be called only by thread for a vreq */ volume_child_req_ptr Volume::create_vol_child_req(const BlkId& bid, const volume_req_ptr& vreq, const uint64_t start_lba, const lba_count_t nlbas) { - volume_child_req_ptr vc_req = volume_child_req::make_request(); vc_req->parent_req = vreq; vc_req->is_read = vreq->is_read_op(); @@ -1059,7 +961,6 @@ volume_child_req_ptr Volume::create_vol_child_req(const BlkId& bid, const volume vc_req->use_cache = vreq->use_cache(); vc_req->part_of_batch = vreq->iface_req->part_of_batch; vc_req->request_id = vreq->request_id; - LOGINFO("create_vol_child_req calls for check and complete write? {}: lba={}, nlbas={}", vreq->is_write(), vreq->lba(), vreq->nlbas()); assert((bid.data_size(HomeBlks::instance()->get_data_pagesz()) % get_page_size()) == 0); vc_req->nlbas = nlbas; diff --git a/src/homeblks/volume/volume.hpp b/src/homeblks/volume/volume.hpp index 76a905257..2617dddc7 100644 --- a/src/homeblks/volume/volume.hpp +++ b/src/homeblks/volume/volume.hpp @@ -465,15 +465,6 @@ class Volume : public std::enable_shared_from_this< Volume > { */ std::error_condition write(const vol_interface_req_ptr& hb_req); std::error_condition write_internal(const vol_interface_req_ptr& hb_req); - std::error_condition write_thin_provisioning(const vol_interface_req_ptr& hb_req); - - - - /* Write to lba - * @param hb_req :- it expects this request to be created - * @return :- no_error if there is no error. It doesn't throw any exception - */ - std::error_condition write_internal(const vol_interface_req_ptr& hb_req); /* Read from lba * @param hb_req :- it expects this request to be created