Skip to content

Commit

Permalink
read/macho: support Go's debug section compression (#697)
Browse files Browse the repository at this point in the history
When using compression, debug sections in Mach-O produced by the go
compiler have a __zdebug_ section name prefix, and the section data has
the same format as GNU .zdebug_ compression for ELF.

Support these section names in `Object::section_by_name`, and support
the compressed section data in `ObjectSection::compressed_data`.

This commit extracts the GNU-style section compression logic from the
read::elf::section to a module underneath read, and then uses it also
in read::macho.
  • Loading branch information
ajwerner authored Jun 25, 2024
1 parent 7b58f78 commit f54ea55
Show file tree
Hide file tree
Showing 9 changed files with 160 additions and 78 deletions.
48 changes: 11 additions & 37 deletions src/read/elf/section.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ use crate::elf;
use crate::endian::{self, Endianness, U32Bytes};
use crate::pod::{self, Pod};
use crate::read::{
self, CompressedData, CompressedFileRange, CompressionFormat, Error, ObjectSection, ReadError,
ReadRef, RelocationMap, SectionFlags, SectionIndex, SectionKind, StringTable,
self, gnu_compression, CompressedData, CompressedFileRange, CompressionFormat, Error,
ObjectSection, ReadError, ReadRef, RelocationMap, SectionFlags, SectionIndex, SectionKind,
StringTable,
};

use super::{
Expand Down Expand Up @@ -508,46 +509,19 @@ impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ElfSection<'data, 'file,
}
}

/// Try GNU-style "ZLIB" header decompression.
// Try GNU-style "ZLIB" header decompression.
fn maybe_compressed_gnu(&self) -> read::Result<Option<CompressedFileRange>> {
let name = match self.name() {
Ok(name) => name,
// I think it's ok to ignore this error?
Err(_) => return Ok(None),
};
if !name.starts_with(".zdebug_") {
if !self
.name()
.map_or(false, |name| name.starts_with(".zdebug_"))
{
return Ok(None);
}
let (section_offset, section_size) = self
.section
.file_range(self.file.endian)
.file_range()
.read_error("Invalid ELF GNU compressed section type")?;
let mut offset = section_offset;
let data = self.file.data;
// Assume ZLIB-style uncompressed data is no more than 4GB to avoid accidentally
// huge allocations. This also reduces the chance of accidentally matching on a
// .debug_str that happens to start with "ZLIB".
if data
.read_bytes(&mut offset, 8)
.read_error("ELF GNU compressed section is too short")?
!= b"ZLIB\0\0\0\0"
{
return Err(Error("Invalid ELF GNU compressed section header"));
}
let uncompressed_size = data
.read::<U32Bytes<_>>(&mut offset)
.read_error("ELF GNU compressed section is too short")?
.get(endian::BigEndian)
.into();
let compressed_size = section_size
.checked_sub(offset - section_offset)
.read_error("ELF GNU compressed section is too short")?;
Ok(Some(CompressedFileRange {
format: CompressionFormat::Zlib,
offset,
compressed_size,
uncompressed_size,
}))
gnu_compression::compressed_file_range(self.file.data, section_offset, section_size)
.map(Some)
}
}

Expand Down
36 changes: 36 additions & 0 deletions src/read/gnu_compression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use crate::read::{self, Error, ReadError as _};
use crate::{endian, CompressedFileRange, CompressionFormat, ReadRef, U32Bytes};

// Attempt to parse the the CompressedFileRange for a section using the GNU-style
// inline compression header format. This is used by the Go compiler in Mach-O files
// as well as by the GNU linker in some ELF files.
pub(super) fn compressed_file_range<'data, R: ReadRef<'data>>(
file_data: R,
section_offset: u64,
section_size: u64,
) -> read::Result<CompressedFileRange> {
let mut offset = section_offset;
// Assume ZLIB-style uncompressed data is no more than 4GB to avoid accidentally
// huge allocations. This also reduces the chance of accidentally matching on a
// .debug_str that happens to start with "ZLIB".
let header = file_data
.read_bytes(&mut offset, 8)
.read_error("GNU compressed section is too short")?;
if header != b"ZLIB\0\0\0\0" {
return Err(Error("Invalid GNU compressed section header"));
}
let uncompressed_size = file_data
.read::<U32Bytes<_>>(&mut offset)
.read_error("GNU compressed section is too short")?
.get(endian::BigEndian)
.into();
let compressed_size = section_size
.checked_sub(offset - section_offset)
.read_error("GNU compressed section is too short")?;
Ok(CompressedFileRange {
format: CompressionFormat::Zlib,
offset,
compressed_size,
uncompressed_size,
})
}
50 changes: 25 additions & 25 deletions src/read/macho/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,32 +287,32 @@ where
&'file self,
section_name: &[u8],
) -> Option<MachOSection<'data, 'file, Mach, R>> {
// Translate the "." prefix to the "__" prefix used by OSX/Mach-O, eg
// ".debug_info" to "__debug_info", and limit to 16 bytes total.
let system_name = if section_name.starts_with(b".") {
if section_name.len() > 15 {
Some(&section_name[1..15])
} else {
Some(&section_name[1..])
}
} else {
None
};
let cmp_section_name = |section: &MachOSection<'data, 'file, Mach, R>| {
section
.name_bytes()
.map(|name| {
section_name == name
|| system_name
.filter(|system_name| {
name.starts_with(b"__") && name[2..] == **system_name
})
.is_some()
})
.unwrap_or(false)
// Translate the section_name by stripping the query_prefix to construct
// a function that matches names starting with name_prefix, taking into
// consideration the maximum section name length.
let make_prefix_matcher = |query_prefix: &'static [u8], name_prefix: &'static [u8]| {
const MAX_SECTION_NAME_LEN: usize = 16;
let suffix = section_name.strip_prefix(query_prefix).map(|suffix| {
let max_len = MAX_SECTION_NAME_LEN - name_prefix.len();
&suffix[..suffix.len().min(max_len)]
});
move |name: &[u8]| suffix.is_some() && name.strip_prefix(name_prefix) == suffix
};

self.sections().find(cmp_section_name)
// Matches "__text" when searching for ".text" and "__debug_str_offs"
// when searching for ".debug_str_offsets", as is common in
// macOS/Mach-O.
let matches_underscores_prefix = make_prefix_matcher(b".", b"__");
// Matches "__zdebug_info" when searching for ".debug_info" and
// "__zdebug_str_off" when searching for ".debug_str_offsets", as is
// used by Go when using GNU-style compression.
let matches_zdebug_prefix = make_prefix_matcher(b".debug_", b"__zdebug_");
self.sections().find(|section| {
section.name_bytes().map_or(false, |name| {
name == section_name
|| matches_underscores_prefix(name)
|| matches_zdebug_prefix(name)
})
})
}

fn section_by_index(&self, index: SectionIndex) -> Result<MachOSection<'data, '_, Mach, R>> {
Expand Down
31 changes: 24 additions & 7 deletions src/read/macho/section.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use crate::endian::{self, Endianness};
use crate::macho;
use crate::pod::Pod;
use crate::read::{
self, CompressedData, CompressedFileRange, ObjectSection, ReadError, ReadRef, RelocationMap,
Result, SectionFlags, SectionIndex, SectionKind,
self, gnu_compression, CompressedData, CompressedFileRange, ObjectSection, ReadError, ReadRef,
RelocationMap, Result, SectionFlags, SectionIndex, SectionKind,
};

use super::{MachHeader, MachOFile, MachORelocationIterator};
Expand Down Expand Up @@ -102,6 +102,21 @@ where
.data(self.file.endian, self.internal.data)
.read_error("Invalid Mach-O section size or offset")
}

// Try GNU-style "ZLIB" header decompression.
fn maybe_compressed_gnu(&self) -> Result<Option<CompressedFileRange>> {
if !self
.name()
.map_or(false, |name| name.starts_with("__zdebug_"))
{
return Ok(None);
}
let (section_offset, section_size) = self
.file_range()
.read_error("Invalid ELF GNU compressed section type")?;
gnu_compression::compressed_file_range(self.internal.data, section_offset, section_size)
.map(Some)
}
}

impl<'data, 'file, Mach, R> read::private::Sealed for MachOSection<'data, 'file, Mach, R>
Expand Down Expand Up @@ -162,14 +177,16 @@ where
))
}

#[inline]
fn compressed_file_range(&self) -> Result<CompressedFileRange> {
Ok(CompressedFileRange::none(self.file_range()))
Ok(if let Some(data) = self.maybe_compressed_gnu()? {
data
} else {
CompressedFileRange::none(self.file_range())
})
}

#[inline]
fn compressed_data(&self) -> Result<CompressedData<'data>> {
self.data().map(CompressedData::none)
fn compressed_data(&self) -> read::Result<CompressedData<'data>> {
self.compressed_file_range()?.data(self.file.data)
}

#[inline]
Expand Down
3 changes: 3 additions & 0 deletions src/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ pub use read_cache::*;
mod util;
pub use util::*;

#[cfg(any(feature = "elf", feature = "macho"))]
mod gnu_compression;

#[cfg(any(
feature = "coff",
feature = "elf",
Expand Down
18 changes: 10 additions & 8 deletions src/read/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,18 @@ pub trait Object<'data>: read::private::Sealed {

/// Get the section named `section_name`, if such a section exists.
///
/// If `section_name` starts with a '.' then it is treated as a system section name,
/// and is compared using the conventions specific to the object file format. This
/// includes:
/// - if ".debug_str_offsets" is requested for a Mach-O object file, then the actual
/// section name that is searched for is "__debug_str_offs".
/// If `section_name` starts with a '.' then it is treated as a system
/// section name, and is compared using the conventions specific to the
/// object file format. This includes:
/// - if ".debug_str_offsets" is requested for a Mach-O object file, then
/// the actual section name that is searched for is "__debug_str_offs".
/// - if ".debug_info" is requested for an ELF object file, then
/// ".zdebug_info" may be returned (and similarly for other debug sections).
/// ".zdebug_info" may be returned (and similarly for other debug
/// sections). Similarly, if ".debug_info" is requested for a Mach-O
/// object file, then "__zdebug_info" may be returned.
///
/// For some object files, multiple segments may contain sections with the same
/// name. In this case, the first matching section will be used.
/// For some object files, multiple segments may contain sections with the
/// same name. In this case, the first matching section will be used.
///
/// This method skips over sections with invalid names.
fn section_by_name(&self, section_name: &str) -> Option<Self::Section<'_>> {
Expand Down
2 changes: 1 addition & 1 deletion testfiles
49 changes: 49 additions & 0 deletions tests/read/macho.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#[cfg(feature = "std")]
use object::{Object, ObjectSection as _};

// Test that we can read compressed sections in Mach-O files as produced
// by the Go compiler.
#[cfg(feature = "std")]
#[test]
fn test_go_macho() {
let macho_testfiles = std::path::Path::new("testfiles/macho");

// Section names we expect to find, whether they should be
// compressed, and the actual name of the section in the file.
const EXPECTED: &[(&str, bool, &str)] = &[
(".debug_abbrev", true, "__zdebug_abbrev"),
(".debug_gdb_scripts", false, "__debug_gdb_scri"),
(".debug_ranges", true, "__zdebug_ranges"),
("__data", false, "__data"),
];

for file in &["go-aarch64", "go-x86_64"] {
let path = macho_testfiles.join(file);
let file = std::fs::File::open(path).unwrap();
let reader = object::read::ReadCache::new(file);
let object = object::read::File::parse(&reader).unwrap();
for &(name, compressed, actual_name) in EXPECTED {
let section = object.section_by_name(name).unwrap();
assert_eq!(section.name(), Ok(actual_name));
let compressed_file_range = section.compressed_file_range().unwrap();
let size = section.size();
if compressed {
assert_eq!(
compressed_file_range.format,
object::CompressionFormat::Zlib
);
assert_eq!(compressed_file_range.compressed_size, size - 12);
assert!(
compressed_file_range.uncompressed_size > compressed_file_range.compressed_size,
"decompressed size is greater than compressed size"
);
} else {
assert_eq!(
compressed_file_range.format,
object::CompressionFormat::None
);
assert_eq!(compressed_file_range.compressed_size, size);
}
}
}
}
1 change: 1 addition & 0 deletions tests/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@

mod coff;
mod elf;
mod macho;

0 comments on commit f54ea55

Please sign in to comment.