From b03f063c0571d9974cfc7ef88eab046c50581d21 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Thu, 18 Jan 2024 15:19:34 +0100 Subject: [PATCH] refactor: replaced some Vecs with boxed slices --- benches/lsmt.rs | 5 +++- src/disk_block.rs | 18 ++++++++---- src/segment/index/mod.rs | 3 +- src/segment/index/writer.rs | 57 ++++++++++++++++++------------------- src/segment/reader.rs | 5 ++-- src/segment/writer.rs | 35 ++++++++++++----------- 6 files changed, 66 insertions(+), 57 deletions(-) diff --git a/benches/lsmt.rs b/benches/lsmt.rs index 700db53..e08802e 100644 --- a/benches/lsmt.rs +++ b/benches/lsmt.rs @@ -56,7 +56,10 @@ fn load_block_from_disk(c: &mut Criterion) { } } - let mut block = ValueBlock { items, crc: 0 }; + let mut block = ValueBlock { + items: items.into_boxed_slice(), + crc: 0, + }; let mut file = tempfile::tempfile().unwrap(); let mut bytes = Vec::with_capacity(u16::MAX.into()); diff --git a/src/disk_block.rs b/src/disk_block.rs index 35063ed..e6a8cf5 100644 --- a/src/disk_block.rs +++ b/src/disk_block.rs @@ -8,7 +8,7 @@ use std::io::{Cursor, Read, Write}; /// The integrity of a block can be checked using the CRC value that is saved in it. #[derive(Clone, Debug)] pub struct DiskBlock { - pub items: Vec, + pub items: Box<[T]>, pub crc: u32, } @@ -37,7 +37,7 @@ impl DiskBlock { impl DiskBlock { /// Calculates the CRC from a list of values - pub fn create_crc(items: &Vec) -> crate::Result { + pub fn create_crc(items: &[T]) -> crate::Result { let mut hasher = crc32fast::Hasher::new(); // NOTE: Truncation is okay and actually needed @@ -73,7 +73,7 @@ impl Serializable for DiskBlock { writer.write_all(&(self.items.len() as u32).to_be_bytes())?; // Serialize each value - for value in &self.items { + for value in self.items.iter() { value.serialize(writer)?; } @@ -95,7 +95,10 @@ impl Deserializable for DiskBlock { items.push(T::deserialize(reader)?); } - Ok(Self { items, crc }) + Ok(Self { + items: items.into_boxed_slice(), + crc, + }) } } @@ -113,7 +116,10 @@ mod tests { let items = vec![item1.clone(), item2.clone()]; let crc = DiskBlock::create_crc(&items)?; - let block = DiskBlock { items, crc }; + let block = DiskBlock { + items: items.into_boxed_slice(), + crc, + }; // Serialize to bytes let mut serialized = Vec::new(); @@ -141,7 +147,7 @@ mod tests { let item2 = Value::new(vec![7, 8, 9], vec![10, 11, 12], 43, ValueType::Value); let block = DiskBlock { - items: vec![item1, item2], + items: [item1, item2].into(), crc: 12345, }; diff --git a/src/segment/index/mod.rs b/src/segment/index/mod.rs index a59d458..418d05f 100644 --- a/src/segment/index/mod.rs +++ b/src/segment/index/mod.rs @@ -314,7 +314,8 @@ impl BlockIndex { let mut tree = BTreeMap::new(); - for item in index.items { + // TODO: https://github.com/rust-lang/rust/issues/59878 + for item in index.items.into_vec() { tree.insert( item.start_key, BlockHandleBlockHandle { diff --git a/src/segment/index/writer.rs b/src/segment/index/writer.rs index 022e851..7454b8f 100644 --- a/src/segment/index/writer.rs +++ b/src/segment/index/writer.rs @@ -36,8 +36,8 @@ pub struct Writer { index_writer: BufWriter, block_size: u32, block_counter: u32, - block_chunk: DiskBlock, - index_chunk: DiskBlock, + block_chunk: Vec, + index_chunk: Vec, } impl Writer { @@ -48,16 +48,6 @@ impl Writer { let index_writer = File::create(path.as_ref().join(TOP_LEVEL_INDEX_FILE))?; let index_writer = BufWriter::new(index_writer); - let block_chunk = DiskBlock { - items: vec![], - crc: 0, - }; - - let index_chunk = DiskBlock { - items: vec![], - crc: 0, - }; - Ok(Self { path: path.as_ref().into(), file_pos: 0, @@ -65,18 +55,23 @@ impl Writer { index_writer, block_counter: 0, block_size, - block_chunk, - index_chunk, + block_chunk: Vec::with_capacity(1_000), + index_chunk: Vec::with_capacity(1_000), }) } fn write_block(&mut self) -> crate::Result<()> { + // Prepare block + let mut block = DiskBlock:: { + items: std::mem::replace(&mut self.block_chunk, Vec::with_capacity(1_000)) + .into_boxed_slice(), + crc: 0, + }; + // Serialize block let mut bytes = Vec::with_capacity(u16::MAX.into()); - self.block_chunk.crc = DiskBlock::::create_crc(&self.block_chunk.items)?; - self.block_chunk - .serialize(&mut bytes) - .expect("should serialize block"); + block.crc = DiskBlock::::create_crc(&block.items)?; + block.serialize(&mut bytes).expect("should serialize block"); // Compress using LZ4 let bytes = compress_prepend_size(&bytes); @@ -88,22 +83,17 @@ impl Writer { .write_all(&bytes)?; // Expect is fine, because the chunk is not empty - let first = self - .block_chunk - .items - .first() - .expect("Chunk should not be empty"); + let first = block.items.first().expect("Chunk should not be empty"); let bytes_written = bytes.len(); - self.index_chunk.items.push(BlockHandle { + self.index_chunk.push(BlockHandle { start_key: first.start_key.clone(), offset: self.file_pos, size: bytes_written as u32, }); self.block_counter = 0; - self.block_chunk.items.clear(); self.file_pos += bytes_written as u64; Ok(()) @@ -122,7 +112,7 @@ impl Writer { offset, size, }; - self.block_chunk.items.push(reference); + self.block_chunk.push(reference); self.block_counter += block_handle_size; @@ -146,14 +136,21 @@ impl Writer { log::trace!("Concatted index blocks onto blocks file"); - for item in &mut self.index_chunk.items { + for item in &mut self.index_chunk { item.offset += block_file_size; } + // Prepare block + let mut block = DiskBlock:: { + items: std::mem::replace(&mut self.index_chunk, Vec::with_capacity(1_000)) + .into_boxed_slice(), + crc: 0, + }; + // Serialize block let mut bytes = Vec::with_capacity(u16::MAX.into()); - self.index_chunk.crc = DiskBlock::::create_crc(&self.index_chunk.items)?; - self.index_chunk + block.crc = DiskBlock::::create_crc(&block.items)?; + block .serialize(&mut bytes) .expect("should serialize index block"); @@ -167,7 +164,7 @@ impl Writer { log::trace!( "Written top level index to {}, with {} pointers ({} bytes)", self.path.join(TOP_LEVEL_INDEX_FILE).display(), - self.index_chunk.items.len(), + block.items.len(), bytes.len(), ); diff --git a/src/segment/reader.rs b/src/segment/reader.rs index 1340ce2..b9b90e6 100644 --- a/src/segment/reader.rs +++ b/src/segment/reader.rs @@ -85,7 +85,7 @@ impl Reader { &self.segment_id, key, )? { - let items = block.items.clone().into(); + let items = block.items.clone().to_vec().into(); self.blocks.insert(key.to_vec().into(), items); Some(()) @@ -109,7 +109,8 @@ impl Reader { drop(file_guard); - self.blocks.insert(key.to_vec().into(), block.items.into()); + self.blocks + .insert(key.to_vec().into(), block.items.to_vec().into()); Ok(Some(())) } else { diff --git a/src/segment/writer.rs b/src/segment/writer.rs index 9692d8f..34963a1 100644 --- a/src/segment/writer.rs +++ b/src/segment/writer.rs @@ -135,7 +135,7 @@ pub struct Writer { block_writer: BufWriter, index_writer: IndexWriter, - chunk: ValueBlock, + chunk: Vec, pub block_count: usize, pub item_count: usize, @@ -181,10 +181,7 @@ impl Writer { let index_writer = IndexWriter::new(&opts.path, opts.block_size)?; - let chunk = ValueBlock { - items: Vec::with_capacity(1_000), - crc: 0, - }; + let chunk = Vec::with_capacity(10_000); Ok(Self { opts, @@ -210,7 +207,7 @@ impl Writer { key_count: 0, #[cfg(feature = "bloom")] - bloom_hash_buffer: Vec::with_capacity(1_000), + bloom_hash_buffer: Vec::with_capacity(10_000), }) } @@ -218,23 +215,28 @@ impl Writer { /// /// This is triggered when a `Writer::write` causes the buffer to grow to the configured `block_size` fn write_block(&mut self) -> crate::Result<()> { - debug_assert!(!self.chunk.items.is_empty()); + debug_assert!(!self.chunk.is_empty()); let uncompressed_chunk_size = self .chunk - .items .iter() .map(|item| item.size() as u64) .sum::(); self.uncompressed_size += uncompressed_chunk_size; + // Prepare block + let mut block = ValueBlock { + items: std::mem::replace(&mut self.chunk, Vec::with_capacity(10_000)) + .into_boxed_slice(), + crc: 0, + }; + block.crc = ValueBlock::create_crc(&block.items)?; + // Serialize block let mut bytes = Vec::with_capacity(u16::MAX.into()); - self.chunk.crc = ValueBlock::create_crc(&self.chunk.items)?; - self.chunk - .serialize(&mut bytes) - .expect("should serialize block"); + + block.serialize(&mut bytes).expect("should serialize block"); // Compress using LZ4 let bytes = compress_prepend_size(&bytes); @@ -248,16 +250,15 @@ impl Writer { let bytes_written = bytes.len() as u32; // Expect is fine, because the chunk is not empty - let first = self.chunk.items.first().expect("Chunk should not be empty"); + let first = block.items.first().expect("Chunk should not be empty"); self.index_writer .register_block(first.key.clone(), self.file_pos, bytes_written)?; // Adjust metadata self.file_pos += u64::from(bytes_written); - self.item_count += self.chunk.items.len(); + self.item_count += block.items.len(); self.block_count += 1; - self.chunk.items.clear(); Ok(()) } @@ -288,7 +289,7 @@ impl Writer { let seqno = item.seqno; self.chunk_size += item.size(); - self.chunk.items.push(item); + self.chunk.push(item); if self.chunk_size >= self.opts.block_size as usize { self.write_block()?; @@ -313,7 +314,7 @@ impl Writer { /// Finishes the segment, making sure all data is written durably pub fn finish(&mut self) -> crate::Result<()> { - if !self.chunk.items.is_empty() { + if !self.chunk.is_empty() { self.write_block()?; }