Skip to content

Commit

Permalink
Meshlet Occlusion Culling (#78)
Browse files Browse the repository at this point in the history
- record visibility per meshlet
- required some improvements and restructuring throughout
- perform occlusion-culling in task-shader
  • Loading branch information
crocdialer authored Oct 25, 2024
1 parent 4e2965a commit 96fc668
Show file tree
Hide file tree
Showing 22 changed files with 663 additions and 231 deletions.
19 changes: 13 additions & 6 deletions include/vierkant/Rasterizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,12 @@ using double_millisecond_t = std::chrono::duration<double, std::milli>;
/**
* @brief Rasterizer can be used to run arbitrary rasterization/graphics pipelines.
*
* It will not render anything on its own, only record secondary command-buffers,
* meant to be executed within an existing renderpass.
* It will not submit anything on its own, only record drawing commands into command-buffers.
*
* Required resources like descriptor-sets and uniform-buffers will be created
* and kept alive, depending on the requested number of in-flight (pending) frames.
* Required resources like descriptor-sets and uniform-buffers will be created
* and kept alive, depending on the requested number of in-flight (pending) frames.
*
* Renderer is NOT thread-safe, with the exception of stage_drawables(...).
* Renderer is NOT thread-safe, with the exception of stage_drawables(...).
*/
class Rasterizer
{
Expand All @@ -53,6 +52,8 @@ class Rasterizer
BINDING_MESHLETS = 13,
BINDING_MESHLET_VERTICES = 14,
BINDING_MESHLET_TRIANGLES = 15,
BINDING_MESHLET_VISIBILITY = 16,
BINDING_DEPTH_PYRAMID = 17,
BINDING_MAX_RANGE
};

Expand All @@ -76,16 +77,18 @@ class Rasterizer
vierkant::Mesh::lod_t lods[8];
};

struct indexed_indirect_command_t
struct alignas(16) indexed_indirect_command_t
{
VkDrawIndexedIndirectCommand vk_draw = {};// size: 5

VkDrawMeshTasksIndirectCommandEXT vk_mesh_draw = {};// size: 3

uint32_t visible = false;
uint32_t late_visible = false;
uint32_t object_index = 0;
uint32_t base_meshlet = 0;
uint32_t num_meshlets = 0;
uint32_t meshlet_visibility_index = 0;
uint32_t count_buffer_offset = 0;
uint32_t first_draw_index = 0;
};
Expand All @@ -104,6 +107,9 @@ class Rasterizer
//! device array containing any array of material_t
vierkant::BufferPtr materials;

//! device array a visibility bitfield for all meshlets
vierkant::BufferPtr meshlet_visibilities;

//! host-visible array of indexed_indirect_command_t
vierkant::BufferPtr draws_in;

Expand Down Expand Up @@ -271,6 +277,7 @@ class Rasterizer
vierkant::BufferPtr mesh_draw_buffer;
vierkant::BufferPtr mesh_entry_buffer;
vierkant::BufferPtr material_buffer;
vierkant::BufferPtr meshlet_visibility_buffer;

// host visible keep-alive staging-buffer
vierkant::BufferPtr staging_buffer;
Expand Down
12 changes: 6 additions & 6 deletions include/vierkant/descriptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,15 @@ DescriptorSetLayoutPtr create_descriptor_set_layout(const vierkant::DevicePtr &d
const descriptor_map_t &descriptors);

/**
* @brief Create a shared VkDescriptorSet (DescriptorSetPtr) for a provided DescriptorLayout
* @brief Create a shared VkDescriptorSet (DescriptorSetPtr) for a provided set-layout.
*
* @param device handle for the vierkant::Device to create the DescriptorSet
* @param pool handle for a shared VkDescriptorPool to allocate the DescriptorSet from
* @param layout handle for a shared VkDescriptorSetLayout to use as blueprint
* @param device handle for the vierkant::Device to create the DescriptorSet
* @param pool handle for a shared VkDescriptorPool to allocate the DescriptorSet from
* @param set_layout handle for a VkDescriptorSetLayout
* @return the newly created DescriptorSetPtr
*/
DescriptorSetPtr create_descriptor_set(const vierkant::DevicePtr &device, const DescriptorPoolPtr &pool,
const DescriptorSetLayoutPtr &layout, bool variable_count);
VkDescriptorSetLayout set_layout, bool variable_count);

/**
* @brief Update an existing shared VkDescriptorSet with a provided array of vierkant::descriptor_t.
Expand Down Expand Up @@ -146,7 +146,7 @@ DescriptorSetLayoutPtr find_or_create_set_layout(const vierkant::DevicePtr &devi
* @return a retrieved or newly created, shared VkDescriptorSet.
*/
DescriptorSetPtr find_or_create_descriptor_set(const vierkant::DevicePtr &device,
const DescriptorSetLayoutPtr &set_layout,
VkDescriptorSetLayout set_layout,
const descriptor_map_t &descriptors,
const vierkant::DescriptorPoolPtr &pool, descriptor_set_map_t &last,
descriptor_set_map_t &current, bool variable_count,
Expand Down
17 changes: 15 additions & 2 deletions include/vierkant/gpu_culling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ struct gpu_cull_params_t
//! limit number of LoDs (0: no limit)
uint32_t max_num_lods = 0;

bool skip_meshlets = false;

VkQueue queue = VK_NULL_HANDLE;
vierkant::semaphore_submit_info_t semaphore_submit_info = {};

Expand Down Expand Up @@ -77,16 +79,27 @@ struct create_depth_pyramid_params_t
* @brief create_gpu_cull_context is a factory to create an opaque gpu_cull_context_ptr.
*
* @param device a provided vierkant::Device.
* @param size context framebuffer-size
* @param pipeline_cache an optional pipeline_cache.
* @return an opaque pointer, owning a gpu_cull_context.
*/
gpu_cull_context_ptr create_gpu_cull_context(const vierkant::DevicePtr &device,
const glm::vec2 &size,
const vierkant::PipelineCachePtr &pipeline_cache = nullptr);

/**
* @brief create_depth_pyramid can be used to create a 'hierarchical z-buffer (hzb)' or 'depth-pyramid'.
* @brief retrieve internally stored 'hierarchical z-buffer (hzb)' / depth-pyramid.
*
* @param context a provided gpu_cull_context_t
* @param params a provided struct with parameters
* @return a vierkant::ImagePtr containing the created depth-pyramid
*/
vierkant::ImagePtr get_depth_pyramid(const vierkant::gpu_cull_context_ptr &context);

/**
* @brief create_depth_pyramid can be used to create a 'hierarchical z-buffer (hzb)' /depth-pyramid.
*
* @param context a provided vierkant::Device.
* @param context a provided gpu_cull_context_t
* @param params a provided struct with parameters
* @return a vierkant::ImagePtr containing the created depth-pyramid
*/
Expand Down
10 changes: 5 additions & 5 deletions include/vierkant/hash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

#pragma once

#include <functional>
#include <cstring>
#include <cstdint>
#include <cstring>
#include <functional>

namespace vierkant
{
Expand Down Expand Up @@ -90,11 +90,11 @@ static inline uint32_t murmur3_32(const K &key, uint32_t seed)

if constexpr(num_hashes)
{
auto ptr = reinterpret_cast<const uint32_t *>(&key);
auto ptr = reinterpret_cast<const uint32_t *>(&key), end = ptr + num_hashes;

for(uint32_t i = num_hashes; i; i--)
for(; ptr < end; ++ptr)
{
h ^= murmur_32_scramble(ptr[i - 1]);
h ^= murmur_32_scramble(*ptr);
h = (h << 13) | (h >> 19);
h = h * 5 + 0xe6546b64;
}
Expand Down
196 changes: 194 additions & 2 deletions include/vierkant/linear_hashmap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,198 @@ class linear_hashmap
clear();
}

[[nodiscard]] inline size_t size() const { return m_num_elements; }

[[nodiscard]] inline size_t capacity() const { return m_capacity; }

[[nodiscard]] inline bool empty() const { return size() == 0; }

inline void clear()
{
m_num_elements = 0;
storage_item_t *ptr = m_storage.get(), *end = ptr + m_capacity;
for(; ptr != end; ++ptr)
{
ptr->key = key_t();
ptr->value = std::optional<value_t>();
}
}

inline uint32_t put(const key_t &key, const value_t &value)
{
check_load_factor();
return internal_put(key, value);
}

[[nodiscard]] std::optional<value_t> get(const key_t &key) const
{
if(!m_capacity) { return {}; }

for(uint32_t idx = m_hash_fn(key);; idx++)
{
idx &= m_capacity - 1;
auto &item = m_storage[idx];
if(item.key == key_t()) { return {}; }
else if(key == item.key)
{
if(item.value) { return item.value; }
}
}
}

void remove(const key_t &key)
{
if(!m_capacity) { return; }

for(uint32_t idx = m_hash_fn(key);; idx++)
{
idx &= m_capacity - 1;
auto &item = m_storage[idx];
if(item.key == key_t()) { return; }
else if(key == item.key && item.value)
{
item.value = {};
m_num_elements--;
return;
}
}
}

[[nodiscard]] inline bool contains(const key_t &key) const { return get(key) != std::nullopt; }

size_t get_storage(void *dst) const
{
struct output_item_t
{
key_t key = {};
value_t value = {};
};

if(dst)
{
auto output_ptr = reinterpret_cast<output_item_t *>(dst);
storage_item_t *item = m_storage.get(), *end = item + m_capacity;
for(; item != end; ++item, ++output_ptr)
{
if(item->key != key_t())
{
output_ptr->key = item->key;
output_ptr->value = item->value ? *item->value : value_t();
}
else { *output_ptr = {}; }
}
}
return sizeof(output_item_t) * m_capacity;
}

void reserve(size_t new_capacity)
{
auto new_linear_hashmap = linear_hashmap(new_capacity);
storage_item_t *ptr = m_storage.get(), *end = ptr + m_capacity;
for(; ptr != end; ++ptr)
{
if(ptr->key != key_t())
{
if(ptr->value) { new_linear_hashmap.put(ptr->key, *ptr->value); }
}
}
swap(*this, new_linear_hashmap);
}

[[nodiscard]] float load_factor() const { return static_cast<float>(m_num_elements) / m_capacity; }

[[nodiscard]] float max_load_factor() const { return m_max_load_factor; }

void max_load_factor(float load_factor)
{
m_max_load_factor = std::clamp<float>(load_factor, 0.01f, 1.f);
check_load_factor();
}

friend void swap(linear_hashmap &lhs, linear_hashmap &rhs)
{
std::swap(lhs.m_capacity, rhs.m_capacity);
std::swap(lhs.m_num_elements, rhs.m_num_elements);
std::swap(lhs.m_storage, rhs.m_storage);
std::swap(lhs.m_hash_fn, rhs.m_hash_fn);
std::swap(lhs.m_max_load_factor, rhs.m_max_load_factor);
std::swap(lhs.m_grow_factor, rhs.m_grow_factor);
}

private:
struct storage_item_t
{
key_t key;
std::optional<value_t> value;
};

inline void check_load_factor()
{
if(m_num_elements >= m_capacity * m_max_load_factor)
{
reserve(std::max<size_t>(32, static_cast<size_t>(m_grow_factor * m_capacity)));
}
}

inline uint32_t internal_put(const key_t key, const value_t &value)
{
uint32_t probe_length = 0;

for(uint64_t idx = m_hash_fn(key);; idx++, probe_length++)
{
idx &= m_capacity - 1;
auto &item = m_storage[idx];

// load previous key
key_t probed_key = item.key;

if(probed_key != key)
{
// hit another valid entry, keep probing
if(probed_key != key_t() && item.value) { continue; }
item.key = key;
m_num_elements++;
}
item.value = value;
return probe_length;
}
}

uint64_t m_capacity = 0;
uint64_t m_num_elements = 0;
std::unique_ptr<storage_item_t[]> m_storage;
hash32_fn m_hash_fn = std::bind(murmur3_32<key_t>, std::placeholders::_1, 0);

// reasonably low load-factor to keep average probe-lengths low
float m_max_load_factor = 0.5f;
float m_grow_factor = 2.f;
};

template<typename K, typename V>
class linear_hashmap_mt
{
public:
using key_t = K;
using value_t = V;
using hash32_fn = std::function<uint32_t(const key_t &)>;
static_assert(std::is_default_constructible_v<key_t>, "key_t not default-constructible");
static_assert(std::equality_comparable<key_t>, "key_t not comparable");

linear_hashmap_mt() = default;
linear_hashmap_mt(const linear_hashmap_mt &) = delete;
linear_hashmap_mt(linear_hashmap_mt &other) : linear_hashmap_mt() { swap(*this, other); };
linear_hashmap_mt &operator=(linear_hashmap_mt other)
{
swap(*this, other);
return *this;
}

explicit linear_hashmap_mt(uint64_t min_capacity)
: m_capacity(crocore::next_pow_2(min_capacity)), m_storage(std::make_unique<storage_item_t[]>(m_capacity))
{
clear();
}

inline size_t size() const { return m_num_elements; }

inline size_t capacity() const { return m_capacity; }
Expand Down Expand Up @@ -133,7 +325,7 @@ class linear_hashmap

void reserve(size_t new_capacity)
{
auto new_linear_hashmap = linear_hashmap(new_capacity);
auto new_linear_hashmap = linear_hashmap_mt(new_capacity);
storage_item_t *ptr = m_storage.get(), *end = ptr + m_capacity;
for(; ptr != end; ++ptr)
{
Expand All @@ -155,7 +347,7 @@ class linear_hashmap
check_load_factor();
}

friend void swap(linear_hashmap &lhs, linear_hashmap &rhs)
friend void swap(linear_hashmap_mt &lhs, linear_hashmap_mt &rhs)
{
std::lock(lhs.m_mutex, rhs.m_mutex);
std::unique_lock lock_lhs(lhs.m_mutex, std::adopt_lock), lock_rhs(rhs.m_mutex, std::adopt_lock);
Expand Down
Loading

0 comments on commit 96fc668

Please sign in to comment.