From 298dfc6bf1a5331f3c4d1c361faf779e12c5131f Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Thu, 25 Jul 2024 23:13:22 -0700 Subject: [PATCH 01/16] cleanup: only compressor needs progress explicitly nulled --- flaca/src/image/jpegtran.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/flaca/src/image/jpegtran.rs b/flaca/src/image/jpegtran.rs index 6be7adc..a1db766 100644 --- a/flaca/src/image/jpegtran.rs +++ b/flaca/src/image/jpegtran.rs @@ -23,14 +23,15 @@ use mozjpeg_sys::{ jpeg_common_struct, jpeg_compress_struct, jpeg_copy_critical_parameters, - jpeg_create_compress, jpeg_create_decompress, + jpeg_CreateCompress, jpeg_decompress_struct, jpeg_destroy_compress, jpeg_destroy_decompress, jpeg_error_mgr, jpeg_finish_compress, jpeg_finish_decompress, + JPEG_LIB_VERSION, jpeg_mem_dest, jpeg_mem_src, jpeg_read_coefficients, @@ -255,7 +256,6 @@ impl<'a> From<&'a [u8]> for JpegSrcInfo<'a> { // Set up the error, then the struct. out.cinfo.common.err = std::ptr::addr_of_mut!(*out.err); jpeg_create_decompress(&mut out.cinfo); - out.cinfo.common.progress = std::ptr::null_mut(); } out @@ -296,7 +296,11 @@ impl From<&mut JpegSrcInfo<'_>> for JpegDstInfo { unsafe { // Set up the error, then the struct. out.cinfo.common.err = std::ptr::addr_of_mut!(*out.err); - jpeg_create_compress(&mut out.cinfo); + jpeg_CreateCompress(&mut out.cinfo, JPEG_LIB_VERSION, size_of_val(&out.cinfo)); + + // Note: depending on the compiler/flags, JPEG compression can + // segfault if this isn't explicitly made null. Not sure why it + // isn't an always/never behavior… out.cinfo.common.progress = std::ptr::null_mut(); // Sync the source trace level with the destination. From 16ef35a5211cba88f2a101d16b4c8c7657409df3 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Thu, 25 Jul 2024 23:46:25 -0700 Subject: [PATCH 02/16] cleanup: self.size doesn't factor into freeing --- flaca/src/image/jpegtran.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaca/src/image/jpegtran.rs b/flaca/src/image/jpegtran.rs index a1db766..0438cbe 100644 --- a/flaca/src/image/jpegtran.rs +++ b/flaca/src/image/jpegtran.rs @@ -88,7 +88,7 @@ impl Deref for EncodedJPEG { impl Drop for EncodedJPEG { #[allow(unsafe_code)] fn drop(&mut self) { - if ! self.is_empty() { + if ! self.buf.is_null() { unsafe { libc::free(self.buf.cast::()); } self.buf = std::ptr::null_mut(); } From 548d5102b47b274b694fc4fe6f9577ce449bcbd2 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Thu, 25 Jul 2024 23:47:45 -0700 Subject: [PATCH 03/16] cleanup: match flapfli::ffi naming --- flaca/src/image/jpegtran.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/flaca/src/image/jpegtran.rs b/flaca/src/image/jpegtran.rs index 0438cbe..68f4aff 100644 --- a/flaca/src/image/jpegtran.rs +++ b/flaca/src/image/jpegtran.rs @@ -78,7 +78,7 @@ impl Deref for EncodedJPEG { #[allow(clippy::cast_possible_truncation, unsafe_code)] fn deref(&self) -> &Self::Target { - if self.is_empty() { &[] } + if self.is_null() { &[] } else { unsafe { std::slice::from_raw_parts(self.buf, self.size as usize) } } @@ -90,7 +90,7 @@ impl Drop for EncodedJPEG { fn drop(&mut self) { if ! self.buf.is_null() { unsafe { libc::free(self.buf.cast::()); } - self.buf = std::ptr::null_mut(); + self.buf = std::ptr::null_mut(); // Is this needed? } } } @@ -104,8 +104,14 @@ impl EncodedJPEG { } } - /// # Is Empty? - fn is_empty(&self) -> bool { self.size == 0 || self.buf.is_null() } + /// # Is Null? + /// + /// This is essentially an `is_empty`, returning `true` if the length value + /// is zero or the buffer pointer is literally null. + /// + /// (The name was chosen to help avoid conflicts with dereferenced slice + /// methods.) + fn is_null(&self) -> bool { self.size == 0 || self.buf.is_null() } } @@ -226,7 +232,7 @@ pub(super) fn optimize(src: &[u8]) -> Option { unsafe { jpeg_finish_decompress(&mut srcinfo.cinfo); } // Return it if we got it! - if happy && ! out.is_empty() && out.size < src_size { Some(out) } + if happy && ! out.is_null() && out.size < src_size { Some(out) } else { None } } From a264a748b75fd373dee10fef7e9e9aa04f3d2ab4 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Thu, 25 Jul 2024 23:48:11 -0700 Subject: [PATCH 04/16] misc: use NonNull to reinforce non-nullness --- flaca/src/image/jpegtran.rs | 14 +++++++------- flapfli/src/lodepng.rs | 18 ++++++++---------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/flaca/src/image/jpegtran.rs b/flaca/src/image/jpegtran.rs index 68f4aff..51a7a1f 100644 --- a/flaca/src/image/jpegtran.rs +++ b/flaca/src/image/jpegtran.rs @@ -55,6 +55,7 @@ use std::{ }, marker::PhantomPinned, ops::Deref, + ptr::NonNull, }; @@ -286,7 +287,7 @@ impl<'a> Drop for JpegSrcInfo<'a> { /// but its error is a raw pointer because `mozjpeg` is really weird. Haha. struct JpegDstInfo { cinfo: jpeg_compress_struct, - err: *mut jpeg_error_mgr, + err: NonNull, _pin: PhantomPinned, } @@ -295,13 +296,14 @@ impl From<&mut JpegSrcInfo<'_>> for JpegDstInfo { fn from(src: &mut JpegSrcInfo<'_>) -> Self { let mut out = Self { cinfo: unsafe { std::mem::zeroed() }, - err: Box::into_raw(new_err()), + // Safety: boxes point somewhere! + err: unsafe { NonNull::new_unchecked(Box::into_raw(new_err())) }, _pin: PhantomPinned, }; unsafe { // Set up the error, then the struct. - out.cinfo.common.err = std::ptr::addr_of_mut!(*out.err); + out.cinfo.common.err = out.err.as_ptr(); jpeg_CreateCompress(&mut out.cinfo, JPEG_LIB_VERSION, size_of_val(&out.cinfo)); // Note: depending on the compiler/flags, JPEG compression can @@ -310,7 +312,7 @@ impl From<&mut JpegSrcInfo<'_>> for JpegDstInfo { out.cinfo.common.progress = std::ptr::null_mut(); // Sync the source trace level with the destination. - src.err.trace_level = (*out.err).trace_level; + src.err.trace_level = out.err.as_ref().trace_level; } out @@ -322,9 +324,7 @@ impl Drop for JpegDstInfo { fn drop(&mut self) { unsafe { jpeg_destroy_compress(&mut self.cinfo); - - // The error pointer is no longer accessible. - let _ = Box::from_raw(self.err); + let _ = Box::from_raw(self.err.as_ptr()); } } } diff --git a/flapfli/src/lodepng.rs b/flapfli/src/lodepng.rs index 101766c..729ff2b 100644 --- a/flapfli/src/lodepng.rs +++ b/flapfli/src/lodepng.rs @@ -82,18 +82,15 @@ pub(crate) extern "C" fn lodepng_crc32(buf: *const c_uchar, len: usize) -> c_uin /// the image dimensions. It enables us to hold one thing instead of three /// while also ensuring the memory is freed correctly on drop. pub(super) struct DecodedImage { - pub(super) buf: *mut c_uchar, - pub(super) w: c_uint, - pub(super) h: c_uint, + pub(super) buf: NonNull, + pub(super) w: u32, + pub(super) h: u32, } impl Drop for DecodedImage { #[allow(unsafe_code)] fn drop(&mut self) { - if let Some(nn) = NonNull::new(self.buf) { - unsafe { flapfli_free(nn); } - self.buf = std::ptr::null_mut(); // Is this necessary? - } + unsafe { flapfli_free(self.buf); } } } @@ -156,7 +153,8 @@ impl LodePNGState { }; // Return it if we got it. - if 0 == res && ! buf.is_null() && 0 != w && 0 != h { + if 0 == res && 0 != w && 0 != h { + let buf = NonNull::new(buf)?; Some(DecodedImage { buf, w, h }) } else { None } @@ -173,7 +171,7 @@ impl LodePNGState { // Safety: a non-zero response is an error. let res = unsafe { - lodepng_encode(&mut out.buf, &mut out.size, img.buf, img.w, img.h, self) + lodepng_encode(&mut out.buf, &mut out.size, img.buf.as_ptr(), img.w, img.h, self) }; 0 == res && ! out.is_null() @@ -235,7 +233,7 @@ impl LodePNGState { // Safety: a non-zero response is an error. let mut stats = LodePNGColorStats::default(); if 0 != unsafe { - lodepng_compute_color_stats(&mut stats, img.buf, img.w, img.h, &self.info_raw) + lodepng_compute_color_stats(&mut stats, img.buf.as_ptr(), img.w, img.h, &self.info_raw) } { return None; } // The image is too small for tRNS chunk overhead. From 147728d7b14cfae95e022306e0511114b574b9a7 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Fri, 26 Jul 2024 00:41:55 -0700 Subject: [PATCH 05/16] misc: use NonZeroU32 to reinforce non-zeroness --- flapfli/src/lodepng.rs | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/flapfli/src/lodepng.rs b/flapfli/src/lodepng.rs index 729ff2b..4afff3a 100644 --- a/flapfli/src/lodepng.rs +++ b/flapfli/src/lodepng.rs @@ -28,6 +28,7 @@ use std::{ c_uint, }, mem::MaybeUninit, + num::NonZeroU32, ptr::NonNull, }; use super::{ @@ -82,9 +83,9 @@ pub(crate) extern "C" fn lodepng_crc32(buf: *const c_uchar, len: usize) -> c_uin /// the image dimensions. It enables us to hold one thing instead of three /// while also ensuring the memory is freed correctly on drop. pub(super) struct DecodedImage { - pub(super) buf: NonNull, - pub(super) w: u32, - pub(super) h: u32, + buf: NonNull, + w: NonZeroU32, + h: NonZeroU32, } impl Drop for DecodedImage { @@ -133,11 +134,14 @@ impl Default for LodePNGState { impl Drop for LodePNGState { #[allow(unsafe_code)] - fn drop(&mut self) { unsafe { lodepng_state_cleanup(self) } } + fn drop(&mut self) { + unsafe { lodepng_state_cleanup(self) } + } } impl LodePNGState { #[allow(unsafe_code)] + #[inline] /// # Decode! /// /// This attempts to decode a raw image byte slice, returning the details @@ -148,14 +152,14 @@ impl LodePNGState { let mut h = 0; // Safety: a non-zero response is an error. - let res = unsafe { + if 0 == unsafe { lodepng_decode(&mut buf, &mut w, &mut h, self, src.as_ptr(), src.len()) - }; - - // Return it if we got it. - if 0 == res && 0 != w && 0 != h { - let buf = NonNull::new(buf)?; - Some(DecodedImage { buf, w, h }) + } { + Some(DecodedImage { + buf: NonNull::new(buf)?, + w: NonZeroU32::new(w)?, + h: NonZeroU32::new(h)?, + }) } else { None } } @@ -171,7 +175,7 @@ impl LodePNGState { // Safety: a non-zero response is an error. let res = unsafe { - lodepng_encode(&mut out.buf, &mut out.size, img.buf.as_ptr(), img.w, img.h, self) + lodepng_encode(&mut out.buf, &mut out.size, img.buf.as_ptr(), img.w.get(), img.h.get(), self) }; 0 == res && ! out.is_null() @@ -233,11 +237,11 @@ impl LodePNGState { // Safety: a non-zero response is an error. let mut stats = LodePNGColorStats::default(); if 0 != unsafe { - lodepng_compute_color_stats(&mut stats, img.buf.as_ptr(), img.w, img.h, &self.info_raw) + lodepng_compute_color_stats(&mut stats, img.buf.as_ptr(), img.w.get(), img.h.get(), &self.info_raw) } { return None; } // The image is too small for tRNS chunk overhead. - if img.w * img.h <= 16 && 0 != stats.key { stats.alpha = 1; } + if img.w.checked_mul(img.h)?.get() <= 16 && 0 != stats.key { stats.alpha = 1; } // Set the encoding color mode to RGB/RGBA. self.encoder.auto_convert = 0; From 4b7dd740105de3128dd8a08bf3a2bc81c81853cb Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Fri, 26 Jul 2024 21:08:35 -0700 Subject: [PATCH 06/16] misc: remove parallel build --- flaca/Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flaca/Cargo.toml b/flaca/Cargo.toml index 7951c43..b77d7f3 100644 --- a/flaca/Cargo.toml +++ b/flaca/Cargo.toml @@ -130,7 +130,8 @@ features = [ "progress" ] [dependencies.mozjpeg-sys] version = "=2.2.0" -features = [ "jpegtran" ] +default-features = false +features = [ "jpegtran", "nasm_simd", "unwinding" ] [dependencies.oxipng] version = "=9.1.2" From d60059c0bcfe162ad75700a1b1b8379731e48870 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Fri, 26 Jul 2024 21:25:35 -0700 Subject: [PATCH 07/16] perf: move oxipng::Options to thread_local --- flaca/src/image/mod.rs | 103 +++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 61 deletions(-) diff --git a/flaca/src/image/mod.rs b/flaca/src/image/mod.rs index a9b950f..d7d25a0 100644 --- a/flaca/src/image/mod.rs +++ b/flaca/src/image/mod.rs @@ -9,22 +9,14 @@ pub(super) mod kind; use crate::MAX_RESOLUTION; use kind::ImageKind; -use oxipng::Options as OxipngOptions; use std::{ path::Path, - sync::{ - atomic::Ordering::Relaxed, - OnceLock, - }, + sync::atomic::Ordering::Relaxed, }; use super::EncodingError; -static OXIPNG_OPTIONS: OnceLock = OnceLock::new(); - - - #[allow(clippy::inline_always)] // This is the hottest path we've got! #[inline(always)] /// # Encode Image. @@ -131,7 +123,47 @@ fn encode_mozjpeg(raw: &mut Vec) { /// oxipng -o 3 -s -a -i 0 --fix /// ``` fn encode_oxipng(raw: &mut Vec) { - if let Ok(mut new) = oxipng::optimize_from_memory(raw, OXIPNG_OPTIONS.get_or_init(oxipng_options)) { + use oxipng::{ + Deflaters, + IndexSet, + Interlacing, + Options, + RowFilter, + StripChunks, + }; + + thread_local!( + static OXI: Options = Options { + fix_errors: true, + force: false, + filter: IndexSet::from([ + RowFilter::None, + RowFilter::Average, + RowFilter::BigEnt, + RowFilter::Bigrams, + RowFilter::Brute, + RowFilter::Entropy, + RowFilter::MinSum, + RowFilter::Paeth, + RowFilter::Sub, + RowFilter::Up, + ]), + interlace: Some(Interlacing::None), + optimize_alpha: true, + bit_depth_reduction: true, + color_type_reduction: true, + palette_reduction: true, + grayscale_reduction: true, + idat_recoding: true, + scale_16: false, + strip: StripChunks::All, + deflate: Deflaters::Libdeflater { compression: 12 }, + fast_evaluation: false, + timeout: None, + } + ); + + if let Ok(mut new) = OXI.with(|opts| oxipng::optimize_from_memory(raw, opts)) { if new.len() < raw.len() && ImageKind::is_png(&new) { std::mem::swap(raw, &mut new); } @@ -155,54 +187,3 @@ fn encode_zopflipng(raw: &mut Vec) { } } } - -#[inline(never)] -/// # Generate Oxipng Options. -/// -/// This returns the strongest possible Oxipng compression profile (minus -/// the zopfli bits, which we try in a separate pass). -/// -/// This is basically just "preset 6", with: -/// * Error fixing enabled; -/// * Libdeflater; -/// * All the alpha optimizations; -/// * Interlacing disabled; -/// * All headers stripped; -fn oxipng_options() -> OxipngOptions { - use oxipng::{ - Deflaters, - IndexSet, - Interlacing, - RowFilter, - StripChunks, - }; - - OxipngOptions { - fix_errors: true, - force: false, - filter: IndexSet::from([ - RowFilter::None, - RowFilter::Average, - RowFilter::BigEnt, - RowFilter::Bigrams, - RowFilter::Brute, - RowFilter::Entropy, - RowFilter::MinSum, - RowFilter::Paeth, - RowFilter::Sub, - RowFilter::Up, - ]), - interlace: Some(Interlacing::None), - optimize_alpha: true, - bit_depth_reduction: true, - color_type_reduction: true, - palette_reduction: true, - grayscale_reduction: true, - idat_recoding: true, - scale_16: false, - strip: StripChunks::All, - deflate: Deflaters::Libdeflater { compression: 12 }, - fast_evaluation: false, - timeout: None, - } -} From 374a7ae738a682c40805cd184aafb70bc7ff2a65 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Fri, 26 Jul 2024 22:46:10 -0700 Subject: [PATCH 08/16] unit: verify undropped pointer types have no Drop impl --- flaca/src/image/jpegtran.rs | 2 +- flapfli/src/ffi.rs | 19 ++++++++++++++++++- flapfli/src/zopflipng/kat.rs | 16 ++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/flaca/src/image/jpegtran.rs b/flaca/src/image/jpegtran.rs index 51a7a1f..ca2eb0f 100644 --- a/flaca/src/image/jpegtran.rs +++ b/flaca/src/image/jpegtran.rs @@ -91,7 +91,7 @@ impl Drop for EncodedJPEG { fn drop(&mut self) { if ! self.buf.is_null() { unsafe { libc::free(self.buf.cast::()); } - self.buf = std::ptr::null_mut(); // Is this needed? + self.buf = std::ptr::null_mut(); // Probably unnecessary? } } } diff --git a/flapfli/src/ffi.rs b/flapfli/src/ffi.rs index 00eafae..64db04d 100644 --- a/flapfli/src/ffi.rs +++ b/flapfli/src/ffi.rs @@ -60,7 +60,7 @@ impl Drop for EncodedPNG { // method, but only if it actually got allocated. if let Some(nn) = NonNull::new(self.buf) { unsafe { flapfli_free(nn); } - self.buf = std::ptr::null_mut(); // Is this necessary? + self.buf = std::ptr::null_mut(); // Probably unnecessary? } } } @@ -210,3 +210,20 @@ const unsafe fn size_and_ptr(ptr: NonNull) -> (NonNull, NonZeroUsize) { (size_and_data_ptr, size) } + + + +#[cfg(test)] +mod tests { + #[test] + /// # No Drop Checks. + /// + /// Prove we aren't missing out by not running drop-in-place or whatever on + /// usize/byte slices. + fn t_nodrop() { + use std::mem::needs_drop; + + assert!(! needs_drop::<[usize]>()); + assert!(! needs_drop::<[u8]>()); + } +} diff --git a/flapfli/src/zopflipng/kat.rs b/flapfli/src/zopflipng/kat.rs index 4a89808..8367d4a 100644 --- a/flapfli/src/zopflipng/kat.rs +++ b/flapfli/src/zopflipng/kat.rs @@ -974,6 +974,22 @@ fn tree_symbols(ll_lengths: &ArrayLL, d_lengths: &ArrayD mod tests { use super::*; + #[test] + /// # No Drop Checks. + /// + /// `KatScratch` manually allocates several data structures, and manually + /// deallocates them on drop. It does not, however, perform any + /// drop-in-place-type actions on the pointers because it doesn't need to. + /// + /// At least, it doesn't so long as this test works. + fn t_nodrop() { + use std::mem::needs_drop; + + assert!(! needs_drop::<[Leaf<'_>; ZOPFLI_NUM_LL]>()); + assert!(! needs_drop::<[List; 15]>()); + assert!(! needs_drop::<[Node; KatScratch::MAX]>()); + } + #[test] /// # Test Maxbits. /// From b58f50efa2b791be2e74df94283cfb06ecb0c276 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Sat, 27 Jul 2024 13:10:41 -0700 Subject: [PATCH 09/16] cleanup: use alloc_zeroed to moot most of the manual zeroing we were doing before --- flapfli/src/zopflipng/cache.rs | 60 +--------------------------- flapfli/src/zopflipng/hash.rs | 71 +++++++--------------------------- flapfli/src/zopflipng/kat.rs | 2 +- flapfli/src/zopflipng/mod.rs | 1 - 4 files changed, 17 insertions(+), 117 deletions(-) diff --git a/flapfli/src/zopflipng/cache.rs b/flapfli/src/zopflipng/cache.rs index 019551b..8a97691 100644 --- a/flapfli/src/zopflipng/cache.rs +++ b/flapfli/src/zopflipng/cache.rs @@ -5,13 +5,7 @@ This module contains the Longest Match cache along with several smaller caching structures that aren't big enough to warrant their own dedicated modules. */ -use std::{ - cell::Cell, - ptr::{ - addr_of_mut, - NonNull, - }, -}; +use std::cell::Cell; use super::{ LitLen, SUBLEN_LEN, @@ -21,7 +15,6 @@ use super::{ ZopfliChunk, ZopfliError, ZopfliRange, - ZopfliStateInit, }; @@ -75,23 +68,6 @@ pub(crate) struct MatchCache { sublen: [u8; SUBLEN_CACHED_LEN * ZOPFLI_MASTER_BLOCK_SIZE], } -impl ZopfliStateInit for MatchCache { - #[allow(unsafe_code)] - #[inline] - /// # State Initialization. - /// - /// See `ZopfliState` for more details. - unsafe fn state_init(nn: NonNull) { - let ptr = nn.as_ptr(); - - // The proper defaults for both members are _mostly_ zeroes, so let's - // roll with that since it's cheap and easy. (The values will be reset - // properly before each use anyway.) - addr_of_mut!((*ptr).ld).write_bytes(0, 1); - addr_of_mut!((*ptr).sublen).write_bytes(0, 1); - } -} - impl MatchCache { /// # Initialize. /// @@ -274,18 +250,6 @@ pub(crate) struct SplitCache { set: [u8; SPLIT_CACHE_LEN], } -impl ZopfliStateInit for SplitCache { - #[allow(unsafe_code)] - #[inline] - /// # State Initialization. - /// - /// See `ZopfliState` for more details. - unsafe fn state_init(nn: NonNull) { - // False is zeroes all the way down. - addr_of_mut!((*nn.as_ptr()).set).write_bytes(0, 1); - } -} - impl SplitCache { /// # Initialize. /// @@ -339,27 +303,7 @@ impl SplitCache { pub(crate) struct SqueezeCache { costs: [(f32, LitLen); ZOPFLI_MASTER_BLOCK_SIZE + 1], paths: [LitLen; ZOPFLI_MASTER_BLOCK_SIZE], - costs_len: Cell, -} - -impl ZopfliStateInit for SqueezeCache { - #[allow(unsafe_code)] - #[inline] - /// # State Initialization. - /// - /// See `ZopfliState` for more details. - unsafe fn state_init(nn: NonNull) { - let ptr = nn.as_ptr(); - - // The arrays can be zero-filled to start with; they'll be reset - // or overwritten before each use anyway. - addr_of_mut!((*ptr).costs).write_bytes(0, 1); - addr_of_mut!((*ptr).paths).write_bytes(0, 1); - - // Zero works equally well for the initial length, especially since - // that happens to be true! - addr_of_mut!((*ptr).costs_len).write(Cell::new(0)); - } + pub(super) costs_len: Cell, } impl SqueezeCache { diff --git a/flapfli/src/zopflipng/hash.rs b/flapfli/src/zopflipng/hash.rs index d0f751e..409834c 100644 --- a/flapfli/src/zopflipng/hash.rs +++ b/flapfli/src/zopflipng/hash.rs @@ -7,13 +7,12 @@ collective structure bundling all the hash/cache shit together. use std::{ alloc::{ - alloc, + alloc_zeroed, handle_alloc_error, Layout, }, cell::Cell, ptr::{ - addr_of, addr_of_mut, NonNull, }, @@ -84,27 +83,30 @@ impl ZopfliState { /// them off the stack, it is necessary to initialize everything from raw /// pointers and box them up. /// - /// This unfortunately requires a lot of upfront unsafe code during + /// This unfortunately requires a some upfront unsafe code during /// construction, but everything can be accessed normally thereafter. /// /// To cut down on some of the complexity, the manual layout allocation and /// boxing is done once, here, instead of separately for each individual /// member. - /// - /// See `ZopfliStateInit` below for a few more details. pub(crate) fn new() -> Box { // Reserve the space. const LAYOUT: Layout = Layout::new::(); - let out: NonNull = NonNull::new(unsafe { alloc(LAYOUT).cast() }) + let out: NonNull = NonNull::new(unsafe { alloc_zeroed(LAYOUT).cast() }) .unwrap_or_else(|| handle_alloc_error(LAYOUT)); let ptr = out.as_ptr(); unsafe { - // Initialize the members. - MatchCache::state_init(NonNull::new_unchecked(addr_of_mut!((*ptr).lmc))); - ZopfliHash::state_init(NonNull::new_unchecked(addr_of_mut!((*ptr).hash))); - SplitCache::state_init(NonNull::new_unchecked(addr_of_mut!((*ptr).split))); - SqueezeCache::state_init(NonNull::new_unchecked(addr_of_mut!((*ptr).squeeze))); + // Safety: zeroes are "valid" for all of the primitives — including + // LitLen, which is sized/aligned to u16 — so alloc_zeroed has + // taken care of everything but the Cell in SqueezeCache, which we + // can sort out thusly: + addr_of_mut!((*ptr).squeeze.costs_len).write(Cell::new(0)); + + // Note: zero is not the appropriate _logical_ default in most + // cases, but since this struct is designed for reuse, it manually + // resets everything when starting a new cycle anyway. At this + // stage, validity is sufficient. // Done! Box::from_raw(ptr) @@ -335,24 +337,6 @@ impl ZopfliState { -/// # State Init. -/// -/// The `ZopfliState` struct is initialized from a raw pointer to prevent -/// stack allocations. This trait exposes — in as limited a way as possible — -/// raw initialization methods for its members. (`ZopfliState::new` is the only -/// place that calls these methods.) -/// -/// The `state_init` invocations do not necessarily populate _default_ values -/// since they'll be re(reset) prior to use anyway, but the values will at -/// least be valid for their types, preventing accidental UB. -pub(crate) trait ZopfliStateInit { - #[allow(unsafe_code)] - /// # State Initialization. - unsafe fn state_init(nn: NonNull); -} - - - #[derive(Clone, Copy)] /// # Zopfli Hash. /// @@ -366,33 +350,6 @@ struct ZopfliHash { same: [u16; ZOPFLI_WINDOW_SIZE], } -impl ZopfliStateInit for ZopfliHash { - #[allow(unsafe_code)] - #[inline] - /// # State Initialization. - /// - /// See `ZopfliState` for more details. - unsafe fn state_init(nn: NonNull) { - let ptr = nn.as_ptr(); - - // All the hash/index arrays default to `-1_i16` for `None`, which - // we can do efficiently by flipping all bits on. - addr_of_mut!((*ptr).chain1.hash_idx).write_bytes(u8::MAX, 1); - addr_of_mut!((*ptr).chain1.idx_hash).write_bytes(u8::MAX, 1); - addr_of_mut!((*ptr).chain1.idx_prev).write_bytes(u8::MAX, 1); - - // The initial hash value is just plain zero. - addr_of_mut!((*ptr).chain1.val).write(0); - - // The second chain is the same as the first, so we can simply copy - // it wholesale. - addr_of_mut!((*ptr).chain2).copy_from_nonoverlapping(addr_of!((*ptr).chain1), 1); - - // The repetition counts default to zero. - addr_of_mut!((*ptr).same).write_bytes(0, 1); - } -} - impl ZopfliHash { /// # Reset/Warm Up. /// @@ -1075,7 +1032,7 @@ struct ZopfliHashChain { impl ZopfliHashChain { /// # Reset. /// - /// Set everything to its default values so we can begin again. + /// (Re)Set all the data to its logical defaults so we can begin again. fn reset(&mut self) { self.hash_idx.fill(-1); self.idx_hash.fill(-1); diff --git a/flapfli/src/zopflipng/kat.rs b/flapfli/src/zopflipng/kat.rs index 8367d4a..3a6c5fd 100644 --- a/flapfli/src/zopflipng/kat.rs +++ b/flapfli/src/zopflipng/kat.rs @@ -956,7 +956,7 @@ fn tree_symbols(ll_lengths: &ArrayLL, d_lengths: &ArrayD let ptr = nn.as_ptr(); // Safety: writing 0..ll_len then ll_len..ll_len + d_len covers the - // full allocation. + // full allocation; everything will be initialized afterwards. std::ptr::copy_nonoverlapping(ll_lengths.as_ptr(), ptr, ll_len); std::ptr::copy_nonoverlapping(d_lengths.as_ptr(), ptr.add(ll_len), d_len); diff --git a/flapfli/src/zopflipng/mod.rs b/flapfli/src/zopflipng/mod.rs index c5b1e9e..4b5334a 100644 --- a/flapfli/src/zopflipng/mod.rs +++ b/flapfli/src/zopflipng/mod.rs @@ -38,7 +38,6 @@ use error::{ zopfli_error, ZopfliError, }; -use hash::ZopfliStateInit; pub(crate) use hash::ZopfliState; use iter::ReducingSlices; use kat::{ From 67549765bb22ac119651fabf13960abab96b9af1 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Sat, 27 Jul 2024 19:25:01 -0700 Subject: [PATCH 10/16] cleanup: simplify hlit/hdist trimming --- flapfli/src/zopflipng/kat.rs | 39 +++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/flapfli/src/zopflipng/kat.rs b/flapfli/src/zopflipng/kat.rs index 3a6c5fd..1c5ca03 100644 --- a/flapfli/src/zopflipng/kat.rs +++ b/flapfli/src/zopflipng/kat.rs @@ -905,26 +905,29 @@ const fn tree_hclen(cl_counts: &[u32; 19]) -> DeflateSymBasic { #[allow(unsafe_code, clippy::cast_possible_truncation)] /// # Tree Symbols. /// -/// Drop the last two bytes from each symbol set, then up to 29 leading zeroes, -/// merge them together (lengths then distances), and return the details. -/// -/// DEFLATE is _evil_! Haha. +/// Drop the last two bytes from each symbol set along with up to 29 +/// trailing zeroes, then merge them together (lengths then distances), and +/// return the details. fn tree_symbols(ll_lengths: &ArrayLL, d_lengths: &ArrayD) -> Result<(Box<[DeflateSym]>, TreeDist, TreeDist), ZopfliError> { // Trim non-zero symbol lengths from ll_lengths[..286], keeping the leading // litlen literals regardless of value. // literals are always kept.) - let mut hlit: usize = 29; - while hlit != 0 && ll_lengths[256 + hlit].is_zero() { hlit -= 1; } - // Safety: TreeDist covers 0..30. - let hlit = unsafe { std::mem::transmute::(hlit as u8) }; - - // Trim non-zero symbol lengths from d_lengths[..30], keeping the first - // entry regardless of value. - let mut hdist: usize = 29; - while hdist != 0 && d_lengths[hdist].is_zero() { hdist -= 1; } - // Safety: TreeDist covers 0..30. - let hdist = unsafe { std::mem::transmute::(hdist as u8) }; + let hlit = ll_lengths[256..286].iter() + .rposition(|&b| ! b.is_zero()) + .map_or(TreeDist::T00, |v| { + // Safety: the slice has length 30, and TreeDist covers 0..=29. + unsafe { std::mem::transmute::(v as u8) } + }); + + // Now do the same for the distances, albeit without the literal/symbolic + // distinction. + let hdist = d_lengths[..30].iter() + .rposition(|&b| ! b.is_zero()) + .map_or(TreeDist::T00, |v| { + // Safety: the slice has length 30, and TreeDist covers 0..=29. + unsafe { std::mem::transmute::(v as u8) } + }); // The combined length. let ll_len = 257 + hlit as usize; @@ -945,8 +948,8 @@ fn tree_symbols(ll_lengths: &ArrayLL, d_lengths: &ArrayD ) }; - // Safety: the checked version of NonNull::new ensures the allocation - // worked. + // Safety: the allocation might fail, though, so we should use the checked + // NonNull before trying to use it! let nn: NonNull = NonNull::new(unsafe { alloc(layout) }) .ok_or(zopfli_error!())? .cast(); @@ -981,7 +984,7 @@ mod tests { /// deallocates them on drop. It does not, however, perform any /// drop-in-place-type actions on the pointers because it doesn't need to. /// - /// At least, it doesn't so long as this test works. + /// At least, it _shouldn't_ need to. Let's verify that! fn t_nodrop() { use std::mem::needs_drop; From a0164f4fbe6a890790a2ae903fe950a4a8980878 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Sat, 3 Aug 2024 00:33:31 -0700 Subject: [PATCH 11/16] bump: mozjpeg-sys 2.2.1 --- flaca/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaca/Cargo.toml b/flaca/Cargo.toml index b77d7f3..10e3bf0 100644 --- a/flaca/Cargo.toml +++ b/flaca/Cargo.toml @@ -129,7 +129,7 @@ version = "0.13.*" features = [ "progress" ] [dependencies.mozjpeg-sys] -version = "=2.2.0" +version = "=2.2.1" default-features = false features = [ "jpegtran", "nasm_simd", "unwinding" ] From 711c8b95ba4d7e2568a74a67a7f27e1e06fd74a0 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Thu, 8 Aug 2024 23:32:13 -0700 Subject: [PATCH 12/16] bump: argyle 0.8 --- flaca/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaca/Cargo.toml b/flaca/Cargo.toml index 10e3bf0..af6322c 100644 --- a/flaca/Cargo.toml +++ b/flaca/Cargo.toml @@ -112,7 +112,7 @@ items = [ ] [dependencies] -argyle = "0.7.*" +argyle = "0.8.*" crossbeam-channel = "=0.5.*" ctrlc = "=3.4.4" dactyl = "0.7.*" From 88ce8bbd469cd6517648ba7e2bef211de1c055fd Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Tue, 13 Aug 2024 11:27:32 -0700 Subject: [PATCH 13/16] bump: ctrlc 3.4.5 --- flaca/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaca/Cargo.toml b/flaca/Cargo.toml index af6322c..28b140a 100644 --- a/flaca/Cargo.toml +++ b/flaca/Cargo.toml @@ -114,7 +114,7 @@ items = [ [dependencies] argyle = "0.8.*" crossbeam-channel = "=0.5.*" -ctrlc = "=3.4.4" +ctrlc = "=3.4.5" dactyl = "0.7.*" dowser = "0.9.*" libc = "0.2.*" From 6fb5b85cffef166bfed0eadc5985680dfde61e29 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Tue, 13 Aug 2024 11:45:30 -0700 Subject: [PATCH 14/16] bump: 3.1.6 --- flaca/Cargo.toml | 2 +- flapfli/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flaca/Cargo.toml b/flaca/Cargo.toml index 28b140a..eba6ad1 100644 --- a/flaca/Cargo.toml +++ b/flaca/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "flaca" -version = "3.1.5" +version = "3.1.6" license = "WTFPL" authors = ["Josh Stoik "] edition = "2021" diff --git a/flapfli/Cargo.toml b/flapfli/Cargo.toml index c332747..fd459e3 100644 --- a/flapfli/Cargo.toml +++ b/flapfli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "flapfli" -version = "3.1.5" +version = "3.1.6" license = "WTFPL" authors = ["Josh Stoik "] edition = "2021" From 051eaf6a7351f73001acd1c5d9c11a41d4ff0b22 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Tue, 13 Aug 2024 11:45:44 -0700 Subject: [PATCH 15/16] misc: GCC is better today. Haha. --- justfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/justfile b/justfile index 9766b99..b402449 100644 --- a/justfile +++ b/justfile @@ -28,11 +28,11 @@ doc_dir := justfile_directory() + "/doc" release_dir := justfile_directory() + "/release" skel_dir := justfile_directory() + "/skel" -export RUSTFLAGS := "-Ctarget-cpu=x86-64-v3 -Cllvm-args=--cost-kind=throughput -Clinker-plugin-lto -Clink-arg=-fuse-ld=lld" -export CC := "clang" -export CXX := "clang++" -export CFLAGS := "-Wall -Wextra -flto -march=x86-64-v3" -export CXXFLAGS := "-Wall -Wextra -flto -march=x86-64-v3" +#export RUSTFLAGS := "-Ctarget-cpu=x86-64-v3 -Cllvm-args=--cost-kind=throughput -Clinker-plugin-lto -Clink-arg=-fuse-ld=lld" +#export CC := "clang" +#export CXX := "clang++" +#export CFLAGS := "-Wall -Wextra -flto -march=x86-64-v3" +#export CXXFLAGS := "-Wall -Wextra -flto -march=x86-64-v3" From 29504c9249c01654fc676782c27be68bb5f25a28 Mon Sep 17 00:00:00 2001 From: Josh Stoik Date: Tue, 13 Aug 2024 11:45:55 -0700 Subject: [PATCH 16/16] build: 3.1.6 --- CREDITS.md | 25 +++++++++++++------------ release/man/flaca.1 | 4 ++-- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/CREDITS.md b/CREDITS.md index 9e02c78..67517c5 100644 --- a/CREDITS.md +++ b/CREDITS.md @@ -1,40 +1,41 @@ # Project Dependencies Package: flaca - Version: 3.1.5 - Generated: 2024-07-26 04:46:44 UTC + Version: 3.1.6 + Generated: 2024-08-13 18:34:14 UTC | Package | Version | Author(s) | License | | ---- | ---- | ---- | ---- | | [ahash](https://github.com/tkaitchuck/ahash) | 0.8.11 | [Tom Kaitchuck](mailto:tom.kaitchuck@gmail.com) | Apache-2.0 or MIT | -| [argyle](https://github.com/Blobfolio/argyle) | 0.7.2 | [Blobfolio, LLC.](mailto:hello@blobfolio.com) | WTFPL | +| [argyle](https://github.com/Blobfolio/argyle) | 0.8.0 | [Blobfolio, LLC.](mailto:hello@blobfolio.com) | WTFPL | | [bitvec](https://github.com/bitvecto-rs/bitvec) | 1.0.1 | | MIT | | [bytecount](https://github.com/llogiq/bytecount) | 0.6.8 | [Andre Bogus](mailto:bogusandre@gmail.de) and [Joshua Landau](mailto:joshua@landau.ws) | Apache-2.0 or MIT | -| [bytemuck](https://github.com/Lokathor/bytemuck) | 1.16.1 | [Lokathor](mailto:zefria@gmail.com) | Apache-2.0, MIT, or Zlib | +| [bytemuck](https://github.com/Lokathor/bytemuck) | 1.16.3 | [Lokathor](mailto:zefria@gmail.com) | Apache-2.0, MIT, or Zlib | | [cfg-if](https://github.com/alexcrichton/cfg-if) | 1.0.0 | [Alex Crichton](mailto:alex@alexcrichton.com) | Apache-2.0 or MIT | | [crc32fast](https://github.com/srijs/rust-crc32fast) | 1.4.2 | [Sam Rijs](mailto:srijs@airpost.net) and [Alex Crichton](mailto:alex@alexcrichton.com) | Apache-2.0 or MIT | | [crossbeam-channel](https://github.com/crossbeam-rs/crossbeam) | 0.5.13 | | Apache-2.0 or MIT | | [crossbeam-utils](https://github.com/crossbeam-rs/crossbeam) | 0.8.20 | | Apache-2.0 or MIT | -| [ctrlc](https://github.com/Detegr/rust-ctrlc.git) | 3.4.4 | [Antti Keränen](mailto:detegr@gmail.com) | Apache-2.0 or MIT | +| [ctrlc](https://github.com/Detegr/rust-ctrlc.git) | 3.4.5 | [Antti Keränen](mailto:detegr@gmail.com) | Apache-2.0 or MIT | | [dactyl](https://github.com/Blobfolio/dactyl) | 0.7.2 | [Blobfolio, LLC.](mailto:hello@blobfolio.com) | WTFPL | | [dowser](https://github.com/Blobfolio/dowser) | 0.9.2 | [Blobfolio, LLC.](mailto:hello@blobfolio.com) | WTFPL | | [equivalent](https://github.com/cuviper/equivalent) | 1.0.1 | | Apache-2.0 or MIT | | [fastrand](https://github.com/smol-rs/fastrand) | 2.1.0 | [Stjepan Glavina](mailto:stjepang@gmail.com) | Apache-2.0 or MIT | -| flapfli | 3.1.5 | [Josh Stoik](mailto:josh@blobfolio.com) | WTFPL | +| flapfli | 3.1.6 | [Josh Stoik](mailto:josh@blobfolio.com) | WTFPL | | [funty](https://github.com/myrrlyn/funty) | 2.0.0 | [myrrlyn](mailto:self@myrrlyn.dev) | MIT | | [fyi_msg](https://github.com/Blobfolio/fyi) | 0.13.6 | [Blobfolio, LLC.](mailto:hello@blobfolio.com) | WTFPL | | [hashbrown](https://github.com/rust-lang/hashbrown) | 0.14.5 | [Amanieu d'Antras](mailto:amanieu@gmail.com) | Apache-2.0 or MIT | -| [indexmap](https://github.com/indexmap-rs/indexmap) | 2.2.6 | | Apache-2.0 or MIT | +| [indexmap](https://github.com/indexmap-rs/indexmap) | 2.3.0 | | Apache-2.0 or MIT | | [libc](https://github.com/rust-lang/libc) | 0.2.155 | The Rust Project Developers | Apache-2.0 or MIT | -| [libdeflate-sys](https://github.com/adamkewley/libdeflater) | 1.20.0 | [Adam Kewley](mailto:contact@adamkewley.com) | Apache-2.0 | -| [libdeflater](https://github.com/adamkewley/libdeflater) | 1.20.0 | [Adam Kewley](mailto:contact@adamkewley.com) | Apache-2.0 | +| [libdeflate-sys](https://github.com/adamkewley/libdeflater) | 1.21.0 | [Adam Kewley](mailto:contact@adamkewley.com) | Apache-2.0 | +| [libdeflater](https://github.com/adamkewley/libdeflater) | 1.21.0 | [Adam Kewley](mailto:contact@adamkewley.com) | Apache-2.0 | | [log](https://github.com/rust-lang/log) | 0.4.22 | The Rust Project Developers | Apache-2.0 or MIT | -| [mozjpeg-sys](https://github.com/kornelski/mozjpeg-sys.git) | 2.2.0 | [Kornel](mailto:kornel@geekhood.net) | IJG AND Zlib AND BSD-3-Clause | +| [mozjpeg-sys](https://github.com/kornelski/mozjpeg-sys.git) | 2.2.1 | [Kornel](mailto:kornel@geekhood.net) | IJG AND Zlib AND BSD-3-Clause | +| [once_cell](https://github.com/matklad/once_cell) | 1.19.0 | [Aleksey Kladov](mailto:aleksey.kladov@gmail.com) | Apache-2.0 or MIT | | [oxipng](https://github.com/shssoichiro/oxipng) | 9.1.2 | [Joshua Holmer](mailto:jholmer.in@gmail.com) | MIT | | [radium](https://github.com/bitvecto-rs/radium) | 0.7.0 | [Nika Layzell](mailto:nika@thelayzells.com) and [myrrlyn](mailto:self@myrrlyn.dev) | MIT | -| [rgb](https://github.com/kornelski/rust-rgb) | 0.8.45 | [Kornel Lesiński](mailto:kornel@geekhood.net) | MIT | +| [rgb](https://github.com/kornelski/rust-rgb) | 0.8.48 | [Kornel Lesiński](mailto:kornel@geekhood.net) and [James Forster](mailto:james.forsterer@gmail.com) | MIT | | [rustc-hash](https://github.com/rust-lang-nursery/rustc-hash) | 1.1.0 | The Rust Project Developers | Apache-2.0 or MIT | | [tap](https://github.com/myrrlyn/tap) | 1.0.1 | [Elliott Linder](mailto:elliott.darfink@gmail.com) and [myrrlyn](mailto:self@myrrlyn.dev) | MIT | -| [tempfile](https://github.com/Stebalien/tempfile) | 3.10.1 | [Steven Allen](mailto:steven@stebalien.com), The Rust Project Developers, [Ashley Mannix](mailto:ashleymannix@live.com.au), and [Jason White](mailto:me@jasonwhite.io) | Apache-2.0 or MIT | +| [tempfile](https://github.com/Stebalien/tempfile) | 3.12.0 | [Steven Allen](mailto:steven@stebalien.com), The Rust Project Developers, [Ashley Mannix](mailto:ashleymannix@live.com.au), and [Jason White](mailto:me@jasonwhite.io) | Apache-2.0 or MIT | | [terminal_size](https://github.com/eminence/terminal-size) | 0.3.0 | [Andrew Chin](mailto:achin@eminence32.net) | Apache-2.0 or MIT | | [unicode-width](https://github.com/unicode-rs/unicode-width) | 0.1.13 | [kwantam](mailto:kwantam@gmail.com) and [Manish Goregaokar](mailto:manishsmail@gmail.com) | Apache-2.0 or MIT | | [write_atomic](https://github.com/Blobfolio/write_atomic) | 0.5.0 | [Blobfolio, LLC.](mailto:hello@blobfolio.com) | WTFPL | diff --git a/release/man/flaca.1 b/release/man/flaca.1 index 0a9f58a..6bd846a 100644 --- a/release/man/flaca.1 +++ b/release/man/flaca.1 @@ -1,6 +1,6 @@ -.TH "FLACA" "1" "July 2024" "Flaca v3.1.5" "User Commands" +.TH "FLACA" "1" "August 2024" "Flaca v3.1.6" "User Commands" .SH NAME -Flaca \- Manual page for flaca v3.1.5. +Flaca \- Manual page for flaca v3.1.6. .SH DESCRIPTION Brute\-force, lossless JPEG and PNG compression. .SS USAGE: