From a46fc26cd6280bb9c40a8c3f6b795b019369222a Mon Sep 17 00:00:00 2001 From: Michael Cho Date: Tue, 1 Oct 2024 14:46:39 -0400 Subject: [PATCH] formula: add `make_deduplication_links_in` Particularly for Java dependents that commonly duplicate JARs, e.g. * `prestodb` can be reduced from 2GB to 600MB * `joern` can be reduced from 1.3GB to 500MB Also can be used for PostgreSQL dependents that have same SQL files installed to support multiple `postgresql@X` formulae. --- Library/Homebrew/formula.rb | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/Library/Homebrew/formula.rb b/Library/Homebrew/formula.rb index 74ab4a987bdb0..4c71d9e90501b 100644 --- a/Library/Homebrew/formula.rb +++ b/Library/Homebrew/formula.rb @@ -1939,6 +1939,48 @@ def time end end + # Replace duplicate files with links to reduce disk space. + # + # FIXME: Hardlinks are not fully supported so using `hardlink: true` will only + # reduce the bottle size or source build but will be duplicated on bottle pour. + # + # ### Example + # + # ```ruby + # make_deduplication_links_in libexec, extension: "jar" + # ``` + sig { params(dir: Pathname, extension: T.nilable(String), allow_noop: T::Boolean, hardlink: T::Boolean).void } + def make_deduplication_links_in(dir, extension: nil, allow_noop: false, hardlink: false) + raise ArgumentError, "#{dir} is not a valid directory!" if !dir.directory? || dir.symlink? + raise ArgumentError, "#{dir} must be within #{prefix}!" unless dir.realpath.to_s.start_with?(prefix.realpath) + + # Use Pathname.new to avoid caching information during build + odebug "Pre-deduplication disk usage of #{dir}: #{disk_usage_readable(Pathname.new(dir).disk_usage)}" + + pattern = "**/*" + pattern += ".#{extension}" if extension + base_files = {} + + nlinks = dir.realpath.glob(pattern).count do |path| + next false if !path.file? || path.symlink? + + base_file = base_files[path.basename.to_s] ||= path + next false if base_file == path || !compare_file(base_file, path) + + rm(path) + if hardlink + path.make_link base_file + else + path.parent.install_symlink base_file + end + true + end + + odebug "Post-deduplication disk usage of #{dir}: #{disk_usage_readable(Pathname.new(dir).disk_usage)}" + odebug "#{nlinks} #{Utils.pluralize("#{hardlink ? "hard" : "sym"}link", nlinks)} created" + raise "No links were created!" if !allow_noop && nlinks.zero? + end + # Replaces a universal binary with its native slice. # # If called with no parameters, does this with all compatible