From 04f22135a0f6b4dd9e488dc5610c7ffee6fb9455 Mon Sep 17 00:00:00 2001 From: Jauhar Arifin Date: Sun, 4 Aug 2024 14:20:49 +0700 Subject: [PATCH] add benchmark for insert and fix eviction logic --- Cargo.lock | 427 ++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 6 + benches/db_benchmark.rs | 31 +++ src/db.rs | 2 + src/lib.rs | 1 - src/pager.rs | 27 ++- 6 files changed, 484 insertions(+), 10 deletions(-) create mode 100644 benches/db_benchmark.rs diff --git a/Cargo.lock b/Cargo.lock index 80d05d2..3d3df97 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.14" @@ -78,18 +84,82 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + [[package]] name = "byteorder" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + [[package]] name = "colorchoice" version = "1.0.1" @@ -102,19 +172,94 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2707e3afba5e19b75d582d88bc79237418f2a2a2d673d01cf9b03633b46e98f3" +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "dbshark" version = "0.1.0" dependencies = [ "anyhow", "crc64", + "criterion", "env_logger", + "indexmap", "log", "parking_lot", "rand", "tempfile", ] +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + [[package]] name = "env_filter" version = "0.1.0" @@ -138,6 +283,12 @@ dependencies = [ "log", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.3.9" @@ -165,18 +316,85 @@ dependencies = [ "wasi", ] +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + [[package]] name = "humantime" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "indexmap" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "libc" version = "0.2.155" @@ -211,6 +429,27 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "oorandom" +version = "11.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" + [[package]] name = "parking_lot" version = "0.12.3" @@ -234,6 +473,34 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "plotters" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" + +[[package]] +name = "plotters-svg" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" +dependencies = [ + "plotters-backend", +] + [[package]] name = "ppv-lite86" version = "0.2.18" @@ -291,6 +558,26 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.2" @@ -342,12 +629,59 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "serde" +version = "1.0.204" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.204" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + [[package]] name = "smallvec" version = "1.13.2" @@ -377,6 +711,16 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "unicode-ident" version = "1.0.12" @@ -389,12 +733,95 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "web-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index dd92bc7..0994347 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,10 +6,16 @@ edition = "2021" [dependencies] anyhow = "1.0.86" crc64 = "2.0.0" +indexmap = "2.3.0" log = "0.4.22" parking_lot = "0.12.3" rand = "0.8.5" [dev-dependencies] +criterion = "0.5.1" env_logger = "0.11.3" tempfile = "3.10.1" + +[[bench]] +name = "db_benchmark" +harness = false diff --git a/benches/db_benchmark.rs b/benches/db_benchmark.rs new file mode 100644 index 0000000..ae89f4e --- /dev/null +++ b/benches/db_benchmark.rs @@ -0,0 +1,31 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use dbshark::{Db, Setting}; +use rand::seq::SliceRandom; +use rand::SeedableRng; + +criterion_group!(benches, tx_insert_benchmark); +criterion_main!(benches); + +pub fn tx_insert_benchmark(c: &mut Criterion) { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path(); + + let mut items = Vec::new(); + for i in 0..100000 { + let key = format!("key{i:05}"); + let val = format!("val{i:05}"); + items.push((key, val)); + } + let mut rng = rand::rngs::StdRng::seed_from_u64(0); + + let db = Db::open(path, Setting::default()).unwrap(); + let mut tx = db.update().unwrap(); + let mut bucket = tx.bucket("sample").unwrap(); + + c.bench_function("insert", |b| { + b.iter(|| { + let (key, val) = items.choose(&mut rng).unwrap(); + bucket.put(key.as_bytes(), val.as_bytes()).unwrap(); + }) + }); +} diff --git a/src/db.rs b/src/db.rs index 7504111..bae87e3 100644 --- a/src/db.rs +++ b/src/db.rs @@ -65,6 +65,8 @@ impl Db { let wal_path = path.join("wal"); let double_buff_path = path.join("dbuff"); + // TODO(important): The files here are not locked. In order to prevent corruption, only + // one database instance can exist at a time. We should lock the file for this. let mut db_file = OpenOptions::new() .read(true) .write(true) diff --git a/src/lib.rs b/src/lib.rs index 47a451b..14bc83d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,3 @@ -// TODO: remove allow unused above // TODO: don't use anyhow for the error handling // TODO: setup CI diff --git a/src/pager.rs b/src/pager.rs index f4ccd2d..b842d42 100644 --- a/src/pager.rs +++ b/src/pager.rs @@ -1,6 +1,7 @@ use crate::content::{Bytes, Content}; use crate::wal::{Lsn, LsnExt, TxId, TxState, Wal, WalRecord}; use anyhow::anyhow; +use indexmap::IndexSet; use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; use std::collections::{HashMap, HashSet}; use std::fs::File; @@ -103,7 +104,7 @@ struct PagerFlushInternal { // TODO: maybe add assert to check if it's not possible that a same page // is stored in both flushing_pages and buffer pool. flushing_pages: Box<[u8]>, - flushing_pgids: Vec, + flushing_pgids: IndexSet, } const PAGE_HEADER_SIZE: usize = 32; @@ -197,7 +198,7 @@ impl Pager { }), flush_internal: RwLock::new(PagerFlushInternal { flushing_pages, - flushing_pgids: vec![], + flushing_pgids: IndexSet::default(), }), db_state: RwLock::new(DbState::default()), }) @@ -442,7 +443,7 @@ impl Pager { &self.f, &self.double_buff_f, self.page_size, - pgid, + old_pgid, buffer, )?; drop(flush_internal); @@ -507,7 +508,7 @@ impl Pager { pgid: PageId, buffer: &[u8], ) -> anyhow::Result<()> { - if let Some(i) = internal.flushing_pgids.iter().position(|p| p == &pgid) { + if let Some((i, _)) = internal.flushing_pgids.get_full(&pgid) { internal.flushing_pages[i * page_size..(i + 1) * page_size].copy_from_slice(buffer); } else { let is_full = @@ -517,7 +518,7 @@ impl Pager { } let i = internal.flushing_pgids.len(); - internal.flushing_pgids.push(pgid); + internal.flushing_pgids.insert(pgid); internal.flushing_pages[i * page_size..(i + 1) * page_size].copy_from_slice(buffer); } @@ -623,10 +624,18 @@ impl Pager { meta.id = pgid; { - let mut f = self.f.lock(); - // TODO: try to seek and read using a single syscall - f.seek(SeekFrom::Start(pgid.get() * self.page_size as u64))?; - f.read_exact(buff)?; + let flush_internal = self.flush_internal.read(); + if let Some((i, _)) = flush_internal.flushing_pgids.get_full(&pgid) { + buff.copy_from_slice( + &flush_internal.flushing_pages[i * self.page_size..(i + 1) * self.page_size], + ); + } else { + drop(flush_internal); + let mut f = self.f.lock(); + // TODO: try to seek and read using a single syscall + f.seek(SeekFrom::Start(pgid.get() * self.page_size as u64))?; + f.read_exact(buff)?; + } } let ok = Self::decode_internal(self.page_size, meta, buff)?;