diff --git a/.github/workflows/CI-libcramjam.yml b/.github/workflows/CI-libcramjam.yml deleted file mode 100644 index b1ab1da6..00000000 --- a/.github/workflows/CI-libcramjam.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: CI-libcramjam - -on: - push: - branches: - - master - pull_request: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.event.pull_request.number || github.sha }} - cancel-in-progress: true - -jobs: - build-test: - name: capi=${{ matrix.capi }}-codec=${{ matrix.codec || 'all' }} - runs-on: ubuntu-latest - strategy: - fail-fast: ${{ !( startsWith(github.ref, 'refs/heads/master') || startsWith(github.ref, 'refs/tags/') ) }} - matrix: - capi: - - true - - false - codec: - - zstd - - blosc2 - - gzip - - brotli - - lz4 - - xz - - deflate - - bzip2 - - null # Use all codecs - - # TODO: codecs not implemented in capi feature - exclude: - - capi: true - codec: blosc2 - - capi: true - codec: xz - - capi: true - codec: deflate - steps: - - uses: actions/checkout@v4 - - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable - - - uses: Swatinem/rust-cache@v2 - name: Rust Cache - - - name: Audit - if: | - !matrix.codec && matrix.capi - run: cargo install cargo-audit && cargo audit - - - name: Test capi and single codec - if: matrix.capi && matrix.codec - run: cargo test -p libcramjam --no-default-features --features ${{ matrix.codec }} --features capi --lib - - - name: Test capi and all codecs - if: matrix.capi && !matrix.codec - run: cargo test -p libcramjam --features capi --lib - - - name: Test no capi and all codecs - if: | - !matrix.capi && !matrix.codec - run: cargo test -p libcramjam --lib - - - name: Test no capi and single codec - if: | - !matrix.capi && matrix.codec - run: cargo test -p libcramjam --lib --no-default-features --features ${{ matrix.codec }} diff --git a/.github/workflows/CI-python.yml b/.github/workflows/CI.yml similarity index 77% rename from .github/workflows/CI-python.yml rename to .github/workflows/CI.yml index fa2dcb50..8357c31e 100644 --- a/.github/workflows/CI-python.yml +++ b/.github/workflows/CI.yml @@ -1,4 +1,4 @@ -name: CI-python +name: CI on: push: @@ -18,9 +18,9 @@ concurrency: jobs: build-test: runs-on: ${{ matrix.conf.os }} - name: ${{ matrix.conf.os }}-${{ matrix.conf.target }}-${{ matrix.package.name }}-${{ matrix.python-version }}-${{ matrix.conf.python-architecture }}-${{ matrix.conf.manylinux }} + name: ${{ matrix.conf.os }}-${{ matrix.conf.target }}-${{ matrix.python-version }}-${{ matrix.conf.python-architecture }}-${{ matrix.conf.manylinux }} strategy: - fail-fast: ${{ !( startsWith(github.ref, 'refs/heads/master') || !startsWith(github.ref, 'refs/tags/') ) }} + fail-fast: ${{ !( startsWith(github.ref, 'refs/heads/master') || startsWith(github.ref, 'refs/tags/') ) }} matrix: python-version: - '3.8' @@ -28,11 +28,6 @@ jobs: - '3.10' - '3.11' - '3.12' - package: - - dir: cramjam-python - name: cramjam - - dir: cramjam-cli - name: cramjam-cli conf: - { os: ubuntu-latest, target: x86_64, target-triple: x86_64-unknown-linux-gnu, manylinux: auto } - { os: ubuntu-latest, target: x86_64, target-triple: x86_64-unknown-linux-musl, manylinux: musllinux_1_1 } @@ -58,86 +53,62 @@ jobs: # Windows x86_64 pypy - conf: { os: windows-latest, target: x86_64, target-triple: x86_64-pc-windows-msvc } python-version: pypy3.9 - package: { name: cramjam, dir: cramjam-python } - conf: { os: windows-latest, target: x86_64, target-triple: x86_64-pc-windows-msvc } python-version: pypy3.9 - package: { name: cramjam-cli, dir: cramjam-cli } - conf: { os: windows-latest, target: x86_64, target-triple: x86_64-pc-windows-msvc } python-version: pypy3.10 - package: { name: cramjam, dir: cramjam-python } - conf: { os: windows-latest, target: x86_64, target-triple: x86_64-pc-windows-msvc } python-version: pypy3.10 - package: { name: cramjam-cli, dir: cramjam-cli } # Linux x86_64 pypy - conf: { os: ubuntu-latest, target: x86_64, target-triple: x86_64-unknown-linux-gnu, manylinux: auto } python-version: pypy3.9 - package: { name: cramjam, dir: cramjam-python } - conf: { os: ubuntu-latest, target: x86_64, target-triple: x86_64-unknown-linux-gnu, manylinux: auto } python-version: pypy3.9 - package: { name: cramjam-cli, dir: cramjam-cli } - conf: { os: ubuntu-latest, target: x86_64, target-triple: x86_64-unknown-linux-gnu, manylinux: auto } python-version: pypy3.10 - package: { name: cramjam, dir: cramjam-python } - conf: { os: ubuntu-latest, target: x86_64, target-triple: x86_64-unknown-linux-gnu, manylinux: auto } python-version: pypy3.10 - package: { name: cramjam-cli, dir: cramjam-cli } # Linux arm pypy - conf: { os: ubuntu-latest, target: aarch64, target-triple: aarch64-unknown-linux-gnu, manylinux: auto } python-version: pypy3.9 - package: { name: cramjam, dir: cramjam-python } - conf: { os: ubuntu-latest, target: aarch64, target-triple: aarch64-unknown-linux-gnu, manylinux: auto } python-version: pypy3.9 - package: { name: cramjam-cli, dir: cramjam-cli } - conf: { os: ubuntu-latest, target: aarch64, target-triple: aarch64-unknown-linux-gnu, manylinux: auto } python-version: pypy3.10 - package: { name: cramjam, dir: cramjam-python } - conf: { os: ubuntu-latest, target: aarch64, target-triple: aarch64-unknown-linux-gnu, manylinux: auto } python-version: pypy3.10 - package: { name: cramjam-cli, dir: cramjam-cli } # OSX x86_64 pypy - conf: { os: macos-13, target: x86_64, target-triple: x86_64-apple-darwin } python-version: pypy3.9 - package: { name: cramjam, dir: cramjam-python } - conf: { os: macos-13, target: x86_64, target-triple: x86_64-apple-darwin } python-version: pypy3.9 - package: { name: cramjam-cli, dir: cramjam-cli } - conf: { os: macos-13, target: x86_64, target-triple: x86_64-apple-darwin } python-version: pypy3.10 - package: { name: cramjam, dir: cramjam-python } - conf: { os: macos-13, target: x86_64, target-triple: x86_64-apple-darwin } python-version: pypy3.10 - package: { name: cramjam-cli, dir: cramjam-cli } # OSX universal2 pypy - conf: { os: macos-13, target: universal2, target-triple: x86_64-apple-darwin } python-version: pypy3.9 - package: { name: cramjam, dir: cramjam-python } - conf: { os: macos-13, target: universal2, target-triple: x86_64-apple-darwin } python-version: pypy3.9 - package: { name: cramjam-cli, dir: cramjam-cli } - conf: { os: macos-13, target: universal2, target-triple: x86_64-apple-darwin } python-version: pypy3.10 - package: { name: cramjam, dir: cramjam-python } - conf: { os: macos-13, target: universal2, target-triple: x86_64-apple-darwin } python-version: pypy3.10 - package: { name: cramjam-cli, dir: cramjam-cli } # OSX arm pypy - conf: { os: macos-13, target: aarch64, target-triple: aarch64-apple-darwin } python-version: pypy3.9 - package: { name: cramjam, dir: cramjam-python } - conf: { os: macos-13, target: aarch64, target-triple: aarch64-apple-darwin } python-version: pypy3.9 - package: { name: cramjam-cli, dir: cramjam-cli } - conf: { os: macos-13, target: aarch64, target-triple: aarch64-apple-darwin } python-version: pypy3.10 - package: { name: cramjam, dir: cramjam-python } - conf: { os: macos-13, target: aarch64, target-triple: aarch64-apple-darwin } python-version: pypy3.10 - package: { name: cramjam-cli, dir: cramjam-cli } steps: - uses: actions/checkout@v4 @@ -199,14 +170,14 @@ jobs: # so we'll just use 'cross' to build and pass it in for the action for all archs # nothing special for the revision pin, just deterministic install cargo install cross --git https://github.com/cross-rs/cross --rev 6d097fb - cross build --release --target $TARGET_TRIPLE --package blosc2-sys --target-dir build --manifest-path ${{ matrix.package.dir }}/Cargo.toml + cross build --release --target $TARGET_TRIPLE --package blosc2-sys --target-dir build blosc2_sys_dir=$(ls build/$TARGET_TRIPLE/release/build/ | grep blosc2-sys) mv $WORKSPACE/build/$TARGET_TRIPLE/release/build/$blosc2_sys_dir/out $BLOSC2_INSTALL_PREFIX tree -L 2 $BLOSC2_INSTALL_PREFIX - name: Rust Tests if: matrix.conf.target == 'x86_64' && !startsWith(matrix.python-version, 'pypy') && matrix.python-version == '3.12' - run: cargo test -p ${{ matrix.package.dir }} + run: cargo test - name: Build wheel (Linux) if: runner.os == 'Linux' @@ -215,7 +186,7 @@ jobs: target: ${{ matrix.conf.target }} manylinux: ${{ matrix.conf.manylinux }} docker-options: -e BLOSC2_INSTALL_PREFIX=${{ github.workspace }}/blosc2 -e LD_LIBRARY_PATH=${{ github.workspace }}/blosc2/lib:${{ github.workspace }}/blosc2/lib64 - args: -i ${{ matrix.python-version }} --release --out dist --manifest-path ${{ matrix.package.dir }}/Cargo.toml --features use-system-blosc2 + args: -i ${{ matrix.python-version }} --release --out dist --features use-system-blosc2 before-script-linux: ls -l $BLOSC2_INSTALL_PREFIX - name: Build wheel (Windows) @@ -231,8 +202,7 @@ jobs: --target ${{ matrix.conf.target-triple }} \ --no-default-features \ --features extension-module \ - --features blosc2-static \ - --manifest-path ${{ matrix.package.dir }}/Cargo.toml + --features blosc2-static name=$(ls -1 wheels/ | head -n 1) delvewheel repair -v wheels/$name -w dist @@ -243,7 +213,7 @@ jobs: with: target: ${{ matrix.conf.target-triple }} # OSX complains with __cpu_model from blosc2 statically compiled so only enabled shared - args: -i python --release --out dist --manifest-path ${{ matrix.package.dir }}/Cargo.toml --no-default-features --features extension-module --features blosc2-shared + args: -i python --release --out dist --no-default-features --features extension-module --features blosc2-shared - name: Fix wheel (MacOS) if: runner.os == 'macOS' @@ -263,32 +233,22 @@ jobs: !( matrix.python-version == 'pypy3.10' && runner.os == 'Windows' ) && ( matrix.conf.target == 'x86_64' || matrix.conf.target == 'universal2' ) run: | - # TODO: Remove after next release of cramjam-cli - python -m pip install pytest-benchmark cramjam - # Second install guarantees it's going to install from local dir w/ --no-index # use first to get in dev dependencies - python -m pip install ${{ matrix.package.name }}[dev] --pre --find-links dist --force-reinstall - python -m pip install ${{ matrix.package.name }} --pre --no-index --find-links dist --force-reinstall + python -m pip install cramjam[dev] --pre --find-links dist --force-reinstall + python -m pip install cramjam --pre --no-index --find-links dist --force-reinstall - python -m pytest ${{ matrix.package.dir }} -vs --benchmark-skip + python -m pytest -vs --benchmark-skip # Could use 'distro: alpine_latest' in 'run-on-arch-action' but seems difficult to install a specific version of python # so we'll just use existing python alpine images to test import and cli use w/o testing archs other than x86_64 - name: Install built wheel and Test (musllinux) if: startsWith(matrix.conf.manylinux, 'musl') && matrix.conf.target == 'x86_64' run: | - if [ "${{ matrix.package.name }}" = "cramjam" ]; then - docker run \ - -v $(pwd)/dist:/wheels \ - --rm python:${{ matrix.python-version }}-alpine sh \ - -c "pip install ${{ matrix.package.name }} --no-index --find-links /wheels && python -c 'import cramjam'" - else - docker run \ - -v $(pwd)/dist:/wheels \ - --rm python:${{ matrix.python-version }}-alpine sh \ - -c "pip install ${{ matrix.package.name }} --no-index --find-links /wheels && cramjam-cli --help" - fi + docker run \ + -v $(pwd)/dist:/wheels \ + --rm python:${{ matrix.python-version }}-alpine sh \ + -c "pip install cramjam --no-index --find-links /wheels && cramjam --help" - name: Install built wheel and Test (Cross) if: | @@ -315,9 +275,9 @@ jobs: PYTHON=python${{ matrix.python-version }} $PYTHON -m venv venv venv/bin/pip install -U pip - venv/bin/pip install ${{ matrix.package.name }} --pre --no-index --find-links /artifacts --force-reinstall + venv/bin/pip install cramjam --pre --no-index --find-links /artifacts --force-reinstall - venv/bin/python -c 'import cramjam' || venv/bin/cramjam-cli --help + venv/bin/cramjam --help - name: Upload wheels uses: actions/upload-artifact@v3 @@ -336,10 +296,8 @@ jobs: python-version: 3.12 - name: Build sdist cramjam run: | - pip install build - python -m build --sdist cramjam-python/ -o ./dist - - name: Build sdist cramjam-cli - run: python -m build --sdist cramjam-cli/ -o ./dist + python -m pip install build + python -m build --sdist -o ./dist - name: Upload sdists uses: actions/upload-artifact@v3 with: @@ -347,44 +305,26 @@ jobs: path: dist pypi-publish: - name: Upload ${{ matrix.package }} release to PyPI + name: Upload release to PyPI if: startsWith(github.ref, 'refs/tags/') strategy: fail-fast: false - matrix: - package: - - cramjam - - cramjam-cli runs-on: ubuntu-latest needs: [build-test, build-sdist] environment: name: pypi - url: https://pypi.org/p/${{ matrix.package }} + url: https://pypi.org/p/cramjam permissions: id-token: write steps: - uses: actions/download-artifact@v3 with: name: dist - - name: Separate packages into directories - run: | - ls -l - - mkdir artifacts-cramjam-cli - mv cramjam_cli* artifacts-cramjam-cli/ - ls -l artifacts-cramjam-cli/ - - mkdir artifacts-cramjam - mv cramjam* artifacts-cramjam/ - ls -l artifacts-cramjam/ - - ls -l - - name: Publish package distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: skip-existing: true - packages-dir: artifacts-${{ matrix.package }}/ + packages-dir: dist/ gh-publish: name: Publish artifacts to GH diff --git a/Cargo.toml b/Cargo.toml index c82c1ddf..59beca85 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,14 +1,33 @@ -[workspace] -members = [ - "libcramjam", - "cramjam-cli", - "cramjam-python" -] -resolver = "2" - -[workspace.package] +[package] +name = "cramjam-python" +version = "2.8.4-rc1" +authors = ["Miles Granger "] edition = "2021" -homepage = "https://github.com/milesgranger/cramjam" +license = "MIT" +description = "Thin Python bindings to de/compression algorithms in Rust" +readme = "README.md" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "cramjam" +crate-type = ["cdylib"] + +[features] +default = ["extension-module"] +extension-module = ["pyo3/extension-module"] +generate-import-lib = ["pyo3/generate-import-lib"] # needed for Windows PyPy builds + +blosc2-static = ["libcramjam/blosc2-static"] +blosc2-shared = ["libcramjam/blosc2-shared"] +use-system-blosc2 = ["libcramjam/use-system-blosc2"] + + +[dependencies] +pyo3 = { version = "^0.20", default-features = false, features = ["macros"] } +libcramjam = { version = "^0.4" } + +[build-dependencies] +pyo3-build-config = "^0.20" [profile.release] strip = true diff --git a/cramjam-python/Makefile b/Makefile similarity index 100% rename from cramjam-python/Makefile rename to Makefile diff --git a/README.md b/README.md index 0f5e4310..4740f4b2 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,101 @@ -# cramjam +# cramjam-python -A collection of compression algorithms, all in one place thru a common (as possible) API. +[![Code Style](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black) +[![CI](https://github.com/milesgranger/cramjam/workflows/CI-python/badge.svg?branch=master)](https://github.com/milesgranger/cramjam/actions?query=branch=master) +[![PyPI](https://img.shields.io/pypi/v/cramjam.svg)](https://pypi.org/project/cramjam) +[![Anaconda-Server Badge](https://anaconda.org/conda-forge/cramjam/badges/version.svg)](https://anaconda.org/conda-forge/cramjam) +[![Downloads](https://pepy.tech/badge/cramjam/month)](https://pepy.tech/project/cramjam) -- [Rust](./libcramjam) -- [Python](./cramjam-python) -- [CLI](./cramjam-cli) +[API Documentation](https://docs.rs/cramjam) + +### Install +```commandline +pip install --upgrade cramjam # Requires no Python or system dependencies! +``` + +### CLI + +A CLI interface is available as [`cramjam-cli`](./../cramjam-cli) + +--- + +Extremely thin Python bindings to de/compression algorithms in Rust. +Allows for using algorithms such as Snappy, without any system dependencies. + +This is handy when being used in environments like AWS Lambda, where installing +packages like `python-snappy` becomes difficult because of system level dependencies. + +--- + +##### Benchmarks + +Some basic benchmarks are available [in the benchmarks directory](./benchmarks/README.md) + +--- + +Available algorithms: + +- [X] Snappy (cramjam.snappy) +- [X] Brotli (cramjam.brotli) +- [X] Bzip2 (cramjam.bzip2) +- [X] Lz4 (cramjam.lz4) +- [X] Gzip (cramjam.gzip) +- [X] Deflate (cramjam.deflate) +- [X] ZSTD (cramjam.zstd) +- [X] XZ / LZMA (cramjam.xz) +- [X] Blosc2 (cramjam.experimental.blosc2) + +All available for use as: + +```python +>>> import cramjam +>>> import numpy as np +>>> compressed = cramjam.snappy.compress(b"bytes here") +>>> decompressed = cramjam.snappy.decompress(compressed) +>>> decompressed +cramjam.Buffer(len=10) # an object which implements the buffer protocol +>>> bytes(decompressed) +b"bytes here" +>>> np.frombuffer(decompressed, dtype=np.uint8) +array([ 98, 121, 116, 101, 115, 32, 104, 101, 114, 101], dtype=uint8) +``` + +Where the API is `cramjam..compress/decompress` and accepts +`bytes`/`bytearray`/`numpy.array`/`cramjam.File`/`cramjam.Buffer` objects. + +**de/compress_into** +Additionally, all variants support `decompress_into` and `compress_into`. +Ex. +```python +>>> import numpy as np +>>> from cramjam import snappy, Buffer +>>> +>>> data = np.frombuffer(b'some bytes here', dtype=np.uint8) +>>> data +array([115, 111, 109, 101, 32, 98, 121, 116, 101, 115, 32, 104, 101, + 114, 101], dtype=uint8) +>>> +>>> compressed = Buffer() +>>> snappy.compress_into(data, compressed) +33 # 33 bytes written to compressed buffer +>>> +>>> compressed.tell() # Where is the buffer position? +33 # goodie! +>>> +>>> compressed.seek(0) # Go back to the start of the buffer so we can prepare to decompress +>>> decompressed = b'0' * len(data) # let's write to `bytes` as output +>>> decompressed +b'000000000000000' +>>> +>>> snappy.decompress_into(compressed, decompressed) +15 # 15 bytes written to decompressed +>>> decompressed +b'some bytes here' +``` + +**Special note!** +If you know the length of the de/compress output, you +can provide `output_len=<>` to any `de/compress` +to get ~1.5-3x performance increase as this allows single +buffer allocation; doesn't really apply if you're using `cramjam.Buffer` +or `cramjam.File` objects. diff --git a/cramjam-python/benchmark-requirements.txt b/benchmark-requirements.txt similarity index 100% rename from cramjam-python/benchmark-requirements.txt rename to benchmark-requirements.txt diff --git a/cramjam-python/benchmarks/README.md b/benchmarks/README.md similarity index 100% rename from cramjam-python/benchmarks/README.md rename to benchmarks/README.md diff --git a/cramjam-python/benchmarks/data/COPYING b/benchmarks/data/COPYING similarity index 100% rename from cramjam-python/benchmarks/data/COPYING rename to benchmarks/data/COPYING diff --git a/cramjam-python/benchmarks/data/Mark.Twain-Tom.Sawyer.txt.bz2 b/benchmarks/data/Mark.Twain-Tom.Sawyer.txt.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/Mark.Twain-Tom.Sawyer.txt.bz2 rename to benchmarks/data/Mark.Twain-Tom.Sawyer.txt.bz2 diff --git a/cramjam-python/benchmarks/data/alice29.txt.bz2 b/benchmarks/data/alice29.txt.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/alice29.txt.bz2 rename to benchmarks/data/alice29.txt.bz2 diff --git a/cramjam-python/benchmarks/data/asyoulik.txt.bz2 b/benchmarks/data/asyoulik.txt.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/asyoulik.txt.bz2 rename to benchmarks/data/asyoulik.txt.bz2 diff --git a/cramjam-python/benchmarks/data/dickens.bz2 b/benchmarks/data/dickens.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/dickens.bz2 rename to benchmarks/data/dickens.bz2 diff --git a/cramjam-python/benchmarks/data/fireworks.jpeg.bz2 b/benchmarks/data/fireworks.jpeg.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/fireworks.jpeg.bz2 rename to benchmarks/data/fireworks.jpeg.bz2 diff --git a/cramjam-python/benchmarks/data/geo.protodata.bz2 b/benchmarks/data/geo.protodata.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/geo.protodata.bz2 rename to benchmarks/data/geo.protodata.bz2 diff --git a/cramjam-python/benchmarks/data/html.bz2 b/benchmarks/data/html.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/html.bz2 rename to benchmarks/data/html.bz2 diff --git a/cramjam-python/benchmarks/data/html_x_4.bz2 b/benchmarks/data/html_x_4.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/html_x_4.bz2 rename to benchmarks/data/html_x_4.bz2 diff --git a/cramjam-python/benchmarks/data/kppkn.gtb.bz2 b/benchmarks/data/kppkn.gtb.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/kppkn.gtb.bz2 rename to benchmarks/data/kppkn.gtb.bz2 diff --git a/cramjam-python/benchmarks/data/lcet10.txt.bz2 b/benchmarks/data/lcet10.txt.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/lcet10.txt.bz2 rename to benchmarks/data/lcet10.txt.bz2 diff --git a/cramjam-python/benchmarks/data/mozilla.bz2 b/benchmarks/data/mozilla.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/mozilla.bz2 rename to benchmarks/data/mozilla.bz2 diff --git a/cramjam-python/benchmarks/data/mr.bz2 b/benchmarks/data/mr.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/mr.bz2 rename to benchmarks/data/mr.bz2 diff --git a/cramjam-python/benchmarks/data/nci.bz2 b/benchmarks/data/nci.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/nci.bz2 rename to benchmarks/data/nci.bz2 diff --git a/cramjam-python/benchmarks/data/ooffice.bz2 b/benchmarks/data/ooffice.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/ooffice.bz2 rename to benchmarks/data/ooffice.bz2 diff --git a/cramjam-python/benchmarks/data/osdb.bz2 b/benchmarks/data/osdb.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/osdb.bz2 rename to benchmarks/data/osdb.bz2 diff --git a/cramjam-python/benchmarks/data/paper-100k.pdf.bz2 b/benchmarks/data/paper-100k.pdf.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/paper-100k.pdf.bz2 rename to benchmarks/data/paper-100k.pdf.bz2 diff --git a/cramjam-python/benchmarks/data/plrabn12.txt.bz2 b/benchmarks/data/plrabn12.txt.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/plrabn12.txt.bz2 rename to benchmarks/data/plrabn12.txt.bz2 diff --git a/cramjam-python/benchmarks/data/reymont.bz2 b/benchmarks/data/reymont.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/reymont.bz2 rename to benchmarks/data/reymont.bz2 diff --git a/cramjam-python/benchmarks/data/samba.bz2 b/benchmarks/data/samba.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/samba.bz2 rename to benchmarks/data/samba.bz2 diff --git a/cramjam-python/benchmarks/data/sao.bz2 b/benchmarks/data/sao.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/sao.bz2 rename to benchmarks/data/sao.bz2 diff --git a/cramjam-python/benchmarks/data/urls.10K.bz2 b/benchmarks/data/urls.10K.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/urls.10K.bz2 rename to benchmarks/data/urls.10K.bz2 diff --git a/cramjam-python/benchmarks/data/webster.bz2 b/benchmarks/data/webster.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/webster.bz2 rename to benchmarks/data/webster.bz2 diff --git a/cramjam-python/benchmarks/data/x-ray.bz2 b/benchmarks/data/x-ray.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/x-ray.bz2 rename to benchmarks/data/x-ray.bz2 diff --git a/cramjam-python/benchmarks/data/xml.bz2 b/benchmarks/data/xml.bz2 similarity index 100% rename from cramjam-python/benchmarks/data/xml.bz2 rename to benchmarks/data/xml.bz2 diff --git a/cramjam-python/benchmarks/test_bench.py b/benchmarks/test_bench.py similarity index 100% rename from cramjam-python/benchmarks/test_bench.py rename to benchmarks/test_bench.py diff --git a/cramjam-python/build.rs b/build.rs similarity index 100% rename from cramjam-python/build.rs rename to build.rs diff --git a/cramjam-cli/Cargo.toml b/cramjam-cli/Cargo.toml deleted file mode 100644 index 64ab9d09..00000000 --- a/cramjam-cli/Cargo.toml +++ /dev/null @@ -1,18 +0,0 @@ -[package] -name = "cramjam-cli" -version = "0.2.0-rc1" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[features] -default = ["blosc2-shared", "blosc2-static"] -extension-module = [] # only here for uniformity w/ cramjam-python's feat set -use-system-blosc2 = ["libcramjam/use-system-blosc2"] -blosc2-static = ["libcramjam/blosc2-static"] -blosc2-shared = ["libcramjam/blosc2-shared"] - -[dependencies] -clap = { version = "^4.2", features = ["derive"] } -bytesize = "^1" -libcramjam = { path = "../libcramjam" } -# libcramjam = "0.3.0" diff --git a/cramjam-cli/LICENSE b/cramjam-cli/LICENSE deleted file mode 120000 index ea5b6064..00000000 --- a/cramjam-cli/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE \ No newline at end of file diff --git a/cramjam-cli/README.md b/cramjam-cli/README.md deleted file mode 100644 index 6cf0edfd..00000000 --- a/cramjam-cli/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# cramjam-cli - -[![CI](https://github.com/milesgranger/pyrus-cramjam/workflows/CI/badge.svg?branch=master)](https://github.com/milesgranger/pyrus-cramjam/actions?query=branch=master) -[![PyPI](https://img.shields.io/pypi/v/cramjam-cli.svg)](https://pypi.org/project/cramjam-cli) -[![Anaconda-Server Badge](https://anaconda.org/conda-forge/cramjam-cli/badges/version.svg)](https://anaconda.org/conda-forge/cramjam-cli) -[![Downloads](https://pepy.tech/badge/cramjam-cli/month)](https://pepy.tech/project/cramjam-cli) - - -### Install (only via pip or conda for now) -```commandline -pip install --upgrade cramjam-cli # Requires no Python or system dependencies! -``` - ---- - -Simple CLI to a variety of compression algorithms - ---- - -Available algorithms: - -- [X] snappy -- [X] brotli -- [X] bzip2 -- [X] lz4 -- [X] gzip -- [X] deflate -- [X] zstd - -All available for use as: - -```bash -cramjam-cli snappy compress --input myfile.txt --output myfile.txt.snappy -cramjam-cli lz4 compress --input myfile.txt # omitting --output will write to stdout -cat myfile.txt | cramjam-cli zstd compress --output myfile.txt.zstd # omitting --input will read from stdin -``` diff --git a/cramjam-cli/pyproject.toml b/cramjam-cli/pyproject.toml deleted file mode 100644 index c06a2b70..00000000 --- a/cramjam-cli/pyproject.toml +++ /dev/null @@ -1,26 +0,0 @@ -[project] -name = "cramjam-cli" -keywords = ["compression", "decompression", "snappy", "zstd", "bz2", "gzip", "lz4", "brotli", "deflate"] -requires-python = ">=3.8" -license = {file = "LICENSE"} - -[project.urls] -homepage = "https://github.com/milesgranger/pyrus-cramjam" -documentation = "https://docs.rs/cramjam/latest/cramjam" -repository = "https://github.com/milesgranger/pyrus-cramjam" - -[build-system] -requires = ["maturin>=0.14"] -build-backend = "maturin" - -[tool.maturin] -bindings = "bin" -strip = true - -[project.optional-dependencies] -dev = [ - "pytest>=5.30", - "pytest-benchmark", - "cramjam", - "hypothesis" -] diff --git a/cramjam-cli/src/main.rs b/cramjam-cli/src/main.rs deleted file mode 100644 index c1141957..00000000 --- a/cramjam-cli/src/main.rs +++ /dev/null @@ -1,199 +0,0 @@ -use std::any::Any; -use std::fs::File; -use std::io; -use std::io::{Cursor, Read, StdoutLock, Write}; -use std::time::{Duration, Instant}; - -use bytesize::ByteSize; -use clap::{Args, Parser, Subcommand, ValueEnum}; - -#[derive(Clone, Parser)] -#[command(author, version, about)] -#[command(after_long_help = "Example: cramjam-cli snappy compress --input myfile.txt --output out.txt.snappy")] -struct Cli { - #[command(subcommand)] - codec: Codec, - #[arg(short, long, global = true, help = "Input file, if not set will read from stdin")] - input: Option, - #[arg(short, long, global = true, help = "Output file, if not set will write to stdout")] - output: Option, - #[arg(short, long, global = true, help = "Remove all informational output", action = clap::ArgAction::SetTrue)] - quiet: bool, -} - -#[derive(Clone, Copy, ValueEnum)] -enum Action { - Compress, - Decompress, -} - -// TODO: Config per algorithm, matching it's specific possible parameters (level, speed, block, etc) -#[derive(Args, Clone)] -struct Config { - #[arg(value_enum)] - action: Action, - #[arg(short, long, help = "Level, if relevant to the algorithm")] - level: Option, -} - -#[derive(Clone, Subcommand)] -enum Codec { - Lz4(Config), - Snappy(Config), - ZSTD(Config), - Brotli(Config), - Gzip(Config), - Deflate(Config), - Bzip2(Config), - Blosc2(Config), -} - -trait ReadableDowncast: Read + Any { - fn as_any(&self) -> &dyn Any; -} -impl ReadableDowncast for T { - fn as_any(&self) -> &dyn Any { - &*self - } -} -trait WritableDowncast: Write + Any { - fn as_any(&self) -> &dyn Any; - fn as_any_mut(&mut self) -> &mut dyn Any; -} -impl WritableDowncast for T { - fn as_any(&self) -> &dyn Any { - &*self - } - fn as_any_mut(&mut self) -> &mut dyn Any { - &mut *self - } -} - -#[derive(Debug)] -enum Error { - #[allow(dead_code)] - Other(String), -} - -impl From for Error { - fn from(err: String) -> Self { - Error::Other(err) - } -} -impl<'a> From<&'a str> for Error { - fn from(err: &'a str) -> Self { - Error::Other(err.to_string()) - } -} - -impl From for io::Error { - fn from(err: Error) -> io::Error { - io::Error::new(io::ErrorKind::Other, err.to_string()) - } -} -impl std::error::Error for Error {} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self) - } -} - -pub fn main() -> io::Result<()> { - let mut m = Cli::parse(); - - let input: Box = match m.input { - Some(path) => Box::new(File::open(path)?), - None => Box::new(std::io::stdin().lock()), - }; - let mut output: Box = match m.output { - Some(path) => Box::new(File::create(path)?), - None => { - m.quiet = true; // Don't echo anything in stdout that isn't actual data output - Box::new(std::io::stdout().lock()) - } - }; - - // if input is a file, then we can probably get the input length for stats - let maybe_len = (&*input) - .as_any() - .downcast_ref::() - .map(|file| file.metadata().ok().map(|m| m.len()).unwrap_or_default()); - - let start = Instant::now(); - let nbytes = match m.codec { - Codec::Blosc2(conf) => match conf.action { - Action::Compress => libcramjam::blosc2::compress(input, &mut output), - Action::Decompress => libcramjam::blosc2::decompress(input, &mut output), - }, - Codec::Snappy(conf) => match conf.action { - Action::Compress => libcramjam::snappy::compress(input, &mut output), - Action::Decompress => libcramjam::snappy::decompress(input, &mut output), - }, - Codec::Lz4(conf) => { - match conf.action { - Action::Compress => { - // TODO: lz4 doesn't impl Read for their Encoder, so cannot determine - // number of bytes compressed without using Seek, which stdout doesn't have, - // as it's streaming. So here, we'll go ahead and read everything in then - // send it in as a cursor, file can remain as is. - // When lz4 implements Reader for the Encoder, then all this can go away. - // along with the `Seek` trait bound on the internal::compress function - if let Some(stdout) = ((&mut *output).as_any_mut()).downcast_mut::() { - let mut data = vec![]; - libcramjam::lz4::compress(input, &mut Cursor::new(&mut data), conf.level.map(|v| v as _))?; - io::copy(&mut Cursor::new(data), stdout).map(|v| v as usize) - } else { - match ((&mut *output).as_any_mut()).downcast_mut::() { - Some(file) => libcramjam::lz4::compress(input, file, conf.level.map(|v| v as _)), - None => unreachable!("Did we implement something other than Stdout and File for output?"), - } - } - } - Action::Decompress => libcramjam::lz4::decompress(input, &mut output), - } - } - Codec::Bzip2(conf) => match conf.action { - Action::Compress => libcramjam::bzip2::compress(input, &mut output, conf.level.map(|v| v as _)), - Action::Decompress => libcramjam::bzip2::decompress(input, &mut output), - }, - Codec::Gzip(conf) => match conf.action { - Action::Compress => libcramjam::gzip::compress(input, &mut output, conf.level.map(|v| v as _)), - Action::Decompress => libcramjam::gzip::decompress(input, &mut output), - }, - Codec::ZSTD(conf) => match conf.action { - Action::Compress => libcramjam::zstd::compress(input, &mut output, conf.level.map(|v| v as _)), - Action::Decompress => libcramjam::zstd::decompress(input, &mut output), - }, - Codec::Deflate(conf) => match conf.action { - Action::Compress => libcramjam::deflate::compress(input, &mut output, conf.level.map(|v| v as _)), - Action::Decompress => libcramjam::deflate::decompress(input, &mut output), - }, - Codec::Brotli(conf) => match conf.action { - Action::Compress => libcramjam::brotli::compress(input, &mut output, conf.level.map(|v| v as _)), - Action::Decompress => libcramjam::brotli::decompress(input, &mut output), - }, - }?; - let duration = start.elapsed(); - - if !m.quiet { - if let Some(len) = maybe_len { - println!("Input: {}", ByteSize(len as _)); - println!("Output: {}", ByteSize(nbytes as _)); - println!("Change: {:.2}%", ((nbytes as f32 - len as f32) / len as f32) * 100.,); - println!("Ratio: {:.2}", (len as f32 / nbytes as f32)); - println!("Throughput: {}/sec", calc_throughput_sec(duration, len as _)); - } - } - Ok(()) -} - -fn calc_throughput_sec(duration: Duration, nbytes: usize) -> ByteSize { - if duration.as_millis() > 0 { - ByteSize(((nbytes as u128 / (duration.as_millis())) as u64) * 1_000) - } else if duration.as_micros() > 0 { - ByteSize(((nbytes as u128 / (duration.as_micros())) as u64) * 10_000) - } else { - ByteSize(((nbytes as u128 / (duration.as_nanos())) as u64) * 100_000) - } -} diff --git a/cramjam-cli/tests/test_cli.py b/cramjam-cli/tests/test_cli.py deleted file mode 100644 index 48a4d28f..00000000 --- a/cramjam-cli/tests/test_cli.py +++ /dev/null @@ -1,76 +0,0 @@ -import os -import subprocess -import tempfile -import pathlib -from datetime import timedelta - -import pytest -from hypothesis import strategies as st, given, settings - -import cramjam - -VARIANTS = ("snappy", "brotli", "bzip2", "lz4", "gzip", "deflate", "zstd") - -# TODO: after blosc2 is moved out of cramjam experimental -if not hasattr(cramjam, "blosc2") and hasattr(cramjam, "experimental") and hasattr(cramjam.experimental, "blosc2"): - cramjam.blosc2 = cramjam.experimental.blosc2 - VARIANTS = (*VARIANTS, "blosc2") - -# Some OS can be slow or have higher variability in their runtimes on CI -settings.register_profile("local", deadline=timedelta(milliseconds=1000)) -settings.register_profile("CI", deadline=None, max_examples=10) -if os.getenv("CI"): - settings.load_profile("CI") -else: - settings.load_profile("local") - - -def run_command(cmd) -> bytes: - return subprocess.check_output(cmd.split(), stderr=subprocess.STDOUT) - - -@given(data=st.binary(min_size=1)) -@pytest.mark.parametrize("variant", VARIANTS) -def test_cli_file_to_file(data, variant): - - with tempfile.TemporaryDirectory() as tmpdir: - infile = pathlib.Path(tmpdir).joinpath("input.txt") - infile.write_bytes(data) - - compressed_file = pathlib.Path(tmpdir).joinpath(f"input.txt.{variant}") - - cmd = f"cramjam-cli {variant} compress --input {infile} --output {compressed_file}" - run_command(cmd) - - expected = bytes(getattr(cramjam, variant).compress(data)) - if variant != "blosc2": - assert expected == compressed_file.read_bytes() - - decompressed_file = pathlib.Path(tmpdir).joinpath("decompressed.txt") - run_command( - f"cramjam-cli {variant} decompress --input {compressed_file} --output {decompressed_file}" - ) - assert data == decompressed_file.read_bytes() - - -@given(data=st.binary(min_size=1)) -@pytest.mark.parametrize("variant", VARIANTS) -def test_cli_file_to_stdout(data, variant): - - with tempfile.TemporaryDirectory() as tmpdir: - infile = pathlib.Path(tmpdir).joinpath("input.txt") - infile.write_bytes(data) - - cmd = f"cramjam-cli {variant} compress --input {infile}" - out = run_command(cmd) - - expected = bytes(getattr(cramjam, variant).compress(data)) - if variant != "blosc2": - assert expected == out - - compressed = pathlib.Path(tmpdir).joinpath(f"compressed.txt.{variant}") - compressed.write_bytes(expected) - - cmd = f"cramjam-cli {variant} decompress --input {compressed}" - out = run_command(cmd) - assert out == data diff --git a/cramjam-python/.readthedocs.yaml b/cramjam-python/.readthedocs.yaml deleted file mode 100644 index 4857411f..00000000 --- a/cramjam-python/.readthedocs.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# https://docs.readthedocs.io/en/stable/config-file/v2.html#supported-settings - -version: 2 - -sphinx: - builder: html - -build: - os: "ubuntu-20.04" - tools: - python: "3.9" - rust: "1.67" - -python: - install: - - requirements: docs/requirements.txt - - method: pip - path: . diff --git a/cramjam-python/Cargo.toml b/cramjam-python/Cargo.toml deleted file mode 100644 index 92885731..00000000 --- a/cramjam-python/Cargo.toml +++ /dev/null @@ -1,30 +0,0 @@ -[package] -name = "cramjam-python" -version = "2.8.4-rc1" -authors = ["Miles Granger "] -edition = "2021" -license = "MIT" -description = "Thin Python bindings to de/compression algorithms in Rust" -readme = "README.md" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[lib] -name = "cramjam" -crate-type = ["cdylib"] - -[features] -default = ["extension-module"] -extension-module = ["pyo3/extension-module"] -generate-import-lib = ["pyo3/generate-import-lib"] # needed for Windows PyPy builds - -blosc2-static = ["libcramjam/blosc2-static"] -blosc2-shared = ["libcramjam/blosc2-shared"] -use-system-blosc2 = ["libcramjam/use-system-blosc2"] - - -[dependencies] -pyo3 = { version = "^0.20", default-features = false, features = ["macros"] } -libcramjam = { version = "^0.4" } - -[build-dependencies] -pyo3-build-config = "^0.20" diff --git a/cramjam-python/LICENSE b/cramjam-python/LICENSE deleted file mode 120000 index ea5b6064..00000000 --- a/cramjam-python/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE \ No newline at end of file diff --git a/cramjam-python/README.md b/cramjam-python/README.md deleted file mode 100644 index 4740f4b2..00000000 --- a/cramjam-python/README.md +++ /dev/null @@ -1,101 +0,0 @@ -# cramjam-python - -[![Code Style](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black) -[![CI](https://github.com/milesgranger/cramjam/workflows/CI-python/badge.svg?branch=master)](https://github.com/milesgranger/cramjam/actions?query=branch=master) -[![PyPI](https://img.shields.io/pypi/v/cramjam.svg)](https://pypi.org/project/cramjam) -[![Anaconda-Server Badge](https://anaconda.org/conda-forge/cramjam/badges/version.svg)](https://anaconda.org/conda-forge/cramjam) -[![Downloads](https://pepy.tech/badge/cramjam/month)](https://pepy.tech/project/cramjam) - -[API Documentation](https://docs.rs/cramjam) - -### Install -```commandline -pip install --upgrade cramjam # Requires no Python or system dependencies! -``` - -### CLI - -A CLI interface is available as [`cramjam-cli`](./../cramjam-cli) - ---- - -Extremely thin Python bindings to de/compression algorithms in Rust. -Allows for using algorithms such as Snappy, without any system dependencies. - -This is handy when being used in environments like AWS Lambda, where installing -packages like `python-snappy` becomes difficult because of system level dependencies. - ---- - -##### Benchmarks - -Some basic benchmarks are available [in the benchmarks directory](./benchmarks/README.md) - ---- - -Available algorithms: - -- [X] Snappy (cramjam.snappy) -- [X] Brotli (cramjam.brotli) -- [X] Bzip2 (cramjam.bzip2) -- [X] Lz4 (cramjam.lz4) -- [X] Gzip (cramjam.gzip) -- [X] Deflate (cramjam.deflate) -- [X] ZSTD (cramjam.zstd) -- [X] XZ / LZMA (cramjam.xz) -- [X] Blosc2 (cramjam.experimental.blosc2) - -All available for use as: - -```python ->>> import cramjam ->>> import numpy as np ->>> compressed = cramjam.snappy.compress(b"bytes here") ->>> decompressed = cramjam.snappy.decompress(compressed) ->>> decompressed -cramjam.Buffer(len=10) # an object which implements the buffer protocol ->>> bytes(decompressed) -b"bytes here" ->>> np.frombuffer(decompressed, dtype=np.uint8) -array([ 98, 121, 116, 101, 115, 32, 104, 101, 114, 101], dtype=uint8) -``` - -Where the API is `cramjam..compress/decompress` and accepts -`bytes`/`bytearray`/`numpy.array`/`cramjam.File`/`cramjam.Buffer` objects. - -**de/compress_into** -Additionally, all variants support `decompress_into` and `compress_into`. -Ex. -```python ->>> import numpy as np ->>> from cramjam import snappy, Buffer ->>> ->>> data = np.frombuffer(b'some bytes here', dtype=np.uint8) ->>> data -array([115, 111, 109, 101, 32, 98, 121, 116, 101, 115, 32, 104, 101, - 114, 101], dtype=uint8) ->>> ->>> compressed = Buffer() ->>> snappy.compress_into(data, compressed) -33 # 33 bytes written to compressed buffer ->>> ->>> compressed.tell() # Where is the buffer position? -33 # goodie! ->>> ->>> compressed.seek(0) # Go back to the start of the buffer so we can prepare to decompress ->>> decompressed = b'0' * len(data) # let's write to `bytes` as output ->>> decompressed -b'000000000000000' ->>> ->>> snappy.decompress_into(compressed, decompressed) -15 # 15 bytes written to decompressed ->>> decompressed -b'some bytes here' -``` - -**Special note!** -If you know the length of the de/compress output, you -can provide `output_len=<>` to any `de/compress` -to get ~1.5-3x performance increase as this allows single -buffer allocation; doesn't really apply if you're using `cramjam.Buffer` -or `cramjam.File` objects. diff --git a/cramjam-python/docs/index.md b/docs/index.md similarity index 100% rename from cramjam-python/docs/index.md rename to docs/index.md diff --git a/libcramjam/Cargo.toml b/libcramjam/Cargo.toml deleted file mode 100644 index a6d70a84..00000000 --- a/libcramjam/Cargo.toml +++ /dev/null @@ -1,54 +0,0 @@ -[package] -name = "libcramjam" -version = "0.4.0" -edition = "2021" -license = "MIT" -description = "Compression library combining a plethora of algorithms in a similar as possible API" -readme = "README.md" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[features] -default = ["snappy", "lz4", "bzip2", "brotli", "xz", "zstd", "gzip", "deflate", "blosc2"] -capi = ["dep:libc"] -snappy = ["dep:snap"] -lz4 = ["dep:lz4"] -bzip2 = ["dep:bzip2"] -brotli = ["dep:brotli"] -xz = ["dep:xz2"] -zstd = ["dep:zstd"] -gzip = ["dep:libdeflater", "dep:libdeflate-sys", "dep:flate2"] -deflate = ["dep:libdeflater", "dep:libdeflate-sys", "dep:flate2"] -blosc2 = ["dep:blosc2-rs"] -blosc2-static = ["blosc2", "blosc2-rs/static"] -blosc2-shared = ["blosc2", "blosc2-rs/shared"] -use-system-blosc2 = ["blosc2", "blosc2-rs/use-system-blosc2"] - -[dependencies] -libc = { version = "0.2", optional = true } -snap = { version = "^1", optional = true } -brotli = { version = "^3", default-features = false, features = ["std", "ffi-api"], optional = true } -bzip2 = { version = "^0.4", optional = true } -lz4 = { version = "^1", optional = true } -flate2 = { version = "^1", optional = true } -libdeflater = { version = "^1", optional = true } -libdeflate-sys = { version = "<1.20.0", optional = true } # TODO: requires gcc>=4.9 not available on Python's CI wheel builds -blosc2-rs = { version = "0.2.3+2.14.3", optional = true } -zstd = { version = "^0.13", optional = true } -xz2 = { version = "0.1.7", features = ["static"], optional = true } - -[build-dependencies] -cbindgen = "^0.24" - -[dev-dependencies] -inline-c = "0.1" - -[package.metadata.capi.pkg_config] -strip_include_path_components = 1 - -[package.metadata.capi.library] -rustflags = "-Cpanic=abort" -name = "cramjam" - -[package.metadata.capi.header] -name = "cramjam" -subdirectory = "cramjam" diff --git a/libcramjam/LICENSE b/libcramjam/LICENSE deleted file mode 120000 index ea5b6064..00000000 --- a/libcramjam/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE \ No newline at end of file diff --git a/libcramjam/README.md b/libcramjam/README.md deleted file mode 100644 index a796b6d8..00000000 --- a/libcramjam/README.md +++ /dev/null @@ -1,11 +0,0 @@ - -# cramjam library - -A Rust library combining different compression algorithms/libraries in a common (as possible) API. - - -Features: - -- `capi`: Build a C-ABI library. Compatible with [`cargo-c`](https://github.com/lu-zero/cargo-c) - -Pre-compiled libraries available on [![Anaconda-Server Badge](https://anaconda.org/conda-forge/libcramjam/badges/version.svg)](https://anaconda.org/conda-forge/libcramjam) diff --git a/libcramjam/cbindgen.toml b/libcramjam/cbindgen.toml deleted file mode 100644 index 38867ee0..00000000 --- a/libcramjam/cbindgen.toml +++ /dev/null @@ -1,4 +0,0 @@ -language = "C" -cpp_compat = true -include_version = true -namespace = "cramjam" diff --git a/libcramjam/src/blosc2.rs b/libcramjam/src/blosc2.rs deleted file mode 100644 index ff0c6b23..00000000 --- a/libcramjam/src/blosc2.rs +++ /dev/null @@ -1,80 +0,0 @@ -//! snappy de/compression interface - -use ::blosc2::CParams; -pub use blosc2; -use std::io::{self, BufReader, Read, Write}; - -// TODO: Could downcast to check for file, then use file-backed SChunk - -/// Compress using Blosc2 SChunk -pub fn compress(rdr: R, wtr: &mut W) -> io::Result -where - R: Read, - W: Write + ?Sized, -{ - let mut schunk = blosc2::schunk::SChunk::new( - blosc2::schunk::Storage::default() - .set_contiguous(true) - .set_cparams(&mut CParams::default()) - .set_dparams(&mut Default::default()), - ); - let mut rdr = BufReader::new(rdr); - - // stream compress into schunk - io::copy(&mut rdr, &mut schunk)?; - - let buf = schunk.into_vec()?; - wtr.write_all(&buf)?; - Ok(buf.len()) -} - -/// Decompress, assumed reader will be giving a SChunk compatible input -pub fn decompress(input: R, output: &mut W) -> io::Result -where - R: Read, - W: Write + ?Sized, -{ - // TODO: Avoid the double copy somehow - let mut buf = vec![]; - io::copy(&mut BufReader::new(input), &mut buf)?; - - let mut schunk = blosc2::schunk::SChunk::from_vec(buf)?; - let mut decoder = blosc2::schunk::SChunkDecoder::new(&mut schunk); - io::copy(&mut decoder, output).map(|v| v as usize) -} - -#[inline(always)] -pub fn compress_chunk(input: &[T]) -> io::Result> { - let buf = blosc2::compress(input, None, None, None, None)?; - Ok(buf) -} - -pub fn compress_chunk_into(input: &[T], output: &mut [u8]) -> io::Result { - let nbytes = blosc2::compress_into(input, output, None, None, None, None)?; - Ok(nbytes) -} - -#[inline(always)] -pub fn decompress_chunk(input: &[u8]) -> io::Result> { - let buf = blosc2::decompress(input)?; - Ok(buf) -} - -pub fn decompress_chunk_into(input: &[u8], output: &mut [T]) -> io::Result { - let nbytes = blosc2::decompress_into(input, output)?; - Ok(nbytes) -} - -#[cfg(test)] -mod tests { - use std::io::Cursor; - - use super::*; - - #[test] - fn test_compress() { - let mut compressed = vec![]; - let data = b"bytes"; - assert!(compress(Cursor::new(data), &mut compressed).is_ok()); - } -} diff --git a/libcramjam/src/brotli.rs b/libcramjam/src/brotli.rs deleted file mode 100644 index f4d9734d..00000000 --- a/libcramjam/src/brotli.rs +++ /dev/null @@ -1,36 +0,0 @@ -//! brotli de/compression interface -use std::io::Write; - -const DEFAULT_COMPRESSION_LEVEL: u32 = 11; -const BUF_SIZE: usize = 1 << 17; // Taken from brotli kCompressFragementTwoPassBlockSize -const LGWIN: u32 = 22; - -pub use brotli; -use std::io::prelude::*; -use std::io::Error; - -/// Decompress via Brotli -#[inline(always)] -pub fn decompress(input: R, output: &mut W) -> Result { - let mut decoder = brotli::Decompressor::new(input, BUF_SIZE); - let n_bytes = std::io::copy(&mut decoder, output)?; - Ok(n_bytes as usize) -} - -/// Compress via Brotli -#[inline(always)] -pub fn compress(input: R, output: &mut W, level: Option) -> Result { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); - let mut encoder = brotli::CompressorReader::new(input, BUF_SIZE, level, LGWIN); - let n_bytes = std::io::copy(&mut encoder, output)?; - Ok(n_bytes as usize) -} - -pub fn make_write_compressor(w: W, level: Option) -> brotli::CompressorWriter { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); - brotli::CompressorWriter::new(w, BUF_SIZE, level, LGWIN) -} - -pub fn compress_bound(input_len: usize) -> usize { - brotli::ffi::compressor::BrotliEncoderMaxCompressedSize(input_len) -} diff --git a/libcramjam/src/bzip2.rs b/libcramjam/src/bzip2.rs deleted file mode 100644 index 9022dfc2..00000000 --- a/libcramjam/src/bzip2.rs +++ /dev/null @@ -1,25 +0,0 @@ -//! bzip2 de/compression interface -use std::io::prelude::*; -use std::io::Error; - -const DEFAULT_COMPRESSION_LEVEL: u32 = 6; - -pub use bzip2; -use bzip2::read::{BzEncoder, MultiBzDecoder}; - -/// Decompress via bzip2 -#[inline(always)] -pub fn decompress(input: R, output: &mut W) -> Result { - let mut decoder = MultiBzDecoder::new(input); - let n_bytes = std::io::copy(&mut decoder, output)?; - Ok(n_bytes as usize) -} - -/// Compress via bzip2 -#[inline(always)] -pub fn compress(input: R, output: &mut W, level: Option) -> Result { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); - let mut encoder = BzEncoder::new(input, bzip2::Compression::new(level)); - let n_bytes = std::io::copy(&mut encoder, output)?; - Ok(n_bytes as usize) -} diff --git a/libcramjam/src/capi.rs b/libcramjam/src/capi.rs deleted file mode 100644 index 8a345608..00000000 --- a/libcramjam/src/capi.rs +++ /dev/null @@ -1,1066 +0,0 @@ -use libc::c_void; - -use std::ffi::{c_char, CString}; -use std::io::Cursor; -use std::io::Write; -use std::slice; - -#[cfg(feature = "brotli")] -use crate::brotli; -#[cfg(feature = "bzip2")] -use crate::bzip2; -#[cfg(feature = "deflate")] -use crate::deflate; -#[cfg(feature = "gzip")] -use crate::gzip; -#[cfg(feature = "lz4")] -use crate::lz4; -#[cfg(feature = "snappy")] -use crate::snappy; -#[cfg(feature = "zstd")] -use crate::zstd; - -#[repr(C)] -pub struct Buffer { - data: *const u8, - len: usize, - owned: bool, -} - -impl Buffer { - pub fn empty() -> Self { - Buffer { - data: std::ptr::null(), - len: 0, - owned: false, - } - } -} - -impl From<&Vec> for Buffer { - fn from(v: &Vec) -> Self { - Buffer { - data: v.as_ptr(), - len: v.len(), - owned: false, - } - } -} -impl From> for Buffer { - fn from(mut v: Vec) -> Self { - v.shrink_to_fit(); - let buffer = Buffer { - data: v.as_ptr(), - len: v.len(), - owned: true, - }; - std::mem::forget(v); - buffer - } -} - -/// All codecs supported by the de/compress and de/compress_into APIs -#[derive(Debug, Copy, Clone)] -#[repr(C)] -pub enum Codec { - #[cfg(feature = "snappy")] - #[allow(dead_code)] - Snappy, - - #[cfg(feature = "snappy")] - #[allow(dead_code)] - SnappyRaw, - - #[cfg(feature = "bzip2")] - #[allow(dead_code)] - Bzip2, - - #[cfg(feature = "lz4")] - #[allow(dead_code)] - Lz4, - - #[cfg(feature = "lz4")] - #[allow(dead_code)] - Lz4Block, - - #[cfg(feature = "zstd")] - #[allow(dead_code)] - Zstd, - - #[cfg(feature = "gzip")] - #[allow(dead_code)] - Gzip, - - #[cfg(feature = "brotli")] - #[allow(dead_code)] - Brotli, -} - -/// Streaming only codecs, which can create De/Compressors using the de/compressor APIs -#[derive(Debug)] -#[repr(C)] -pub enum StreamingCodec { - #[cfg(feature = "bzip2")] - #[allow(dead_code)] - StreamingBzip2, - - #[cfg(feature = "snappy")] - #[allow(dead_code)] - StreamingSnappy, - - #[cfg(feature = "lz4")] - #[allow(dead_code)] - StreamingLz4, - - #[cfg(feature = "zstd")] - #[allow(dead_code)] - StreamingZstd, - - #[cfg(feature = "gzip")] - #[allow(dead_code)] - StreamingGzip, - - #[cfg(feature = "brotli")] - #[allow(dead_code)] - StreamingBrotli, -} - -#[cfg(feature = "snappy")] -type SnappyFrameCompressor = snappy::snap::write::FrameEncoder>; -#[cfg(feature = "bzip2")] -type Bzip2Compressor = bzip2::bzip2::write::BzEncoder>; -#[cfg(feature = "lz4")] -type Lz4Compressor = crate::lz4::lz4::Encoder>; -#[cfg(feature = "gzip")] -type GzipCompressor = crate::gzip::flate2::write::GzEncoder>; -#[cfg(feature = "brotli")] -type BrotliCompressor = brotli::brotli::CompressorWriter>; -#[cfg(feature = "zstd")] -type ZstdCompressor<'a> = crate::zstd::zstd::Encoder<'a, Vec>; - -type Decompressor = Cursor>; - -// Set the error string to a error message pointer -#[inline(always)] -fn error_to_ptr(err: impl ToString, ptr: &mut *mut c_char) { - let err_msg = CString::new(err.to_string()).unwrap(); - *ptr = err_msg.into_raw(); -} - -/// Safe to call on a nullptr -#[no_mangle] -pub extern "C" fn free_string(ptr: *mut c_char) { - if !ptr.is_null() { - let _ = unsafe { CString::from_raw(ptr) }; - } -} - -#[no_mangle] -pub extern "C" fn free_buffer(buf: Buffer) { - if !buf.data.is_null() && buf.owned { - let _ = unsafe { Vec::from_raw_parts(buf.data as *mut u8, buf.len, buf.len) }; - } -} - -#[no_mangle] -pub extern "C" fn decompress( - codec: Codec, - input: *const u8, - input_len: usize, - nbytes_read: &mut usize, - nbytes_written: &mut usize, - error: &mut *mut c_char, -) -> Buffer { - let mut decompressed = Cursor::new(vec![]); - let mut compressed = Cursor::new(unsafe { std::slice::from_raw_parts(input, input_len) }); - let ret: Result = match codec { - #[cfg(feature = "snappy")] - Codec::Snappy => snappy::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "snappy")] - Codec::SnappyRaw => snappy::raw::decompress_vec(compressed.get_ref()).map(|v| { - let len = v.len(); - *decompressed.get_mut() = v; - decompressed.set_position(len as _); - compressed.set_position(input_len as _); // todo, assuming it read the whole thing - len - }), - #[cfg(feature = "bzip2")] - Codec::Bzip2 => bzip2::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "brotli")] - Codec::Brotli => brotli::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "gzip")] - Codec::Gzip => gzip::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "zstd")] - Codec::Zstd => zstd::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "lz4")] - Codec::Lz4 => lz4::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "lz4")] - Codec::Lz4Block => lz4::block::decompress_vec(compressed.get_ref()).map(|v| { - let len = v.len(); - *decompressed.get_mut() = v; - decompressed.set_position(len as _); - compressed.set_position(input_len as _); // todo, assuming it read the whole thing - len - }), - }; - match ret { - Ok(n) => { - *nbytes_read = compressed.position() as usize; - *nbytes_written = n; - match decompressed.flush() { - Ok(_) => Buffer::from(decompressed.into_inner()), - Err(err) => { - error_to_ptr(err, error); - Buffer::empty() - } - } - } - Err(err) => { - error_to_ptr(err, error); - Buffer::empty() - } - } -} - -#[no_mangle] -pub extern "C" fn compress( - codec: Codec, - level: i32, - input: *const u8, - input_len: usize, - nbytes_read: &mut usize, - nbytes_written: &mut usize, - error: &mut *mut c_char, -) -> Buffer { - if level < 0 { - error_to_ptr("Requires compression >= 0", error); - return Buffer::empty(); - } - let level = Some(level); - let mut compressed = Cursor::new(vec![]); - let mut decompressed = Cursor::new(unsafe { std::slice::from_raw_parts(input, input_len) }); - let ret: Result = match codec { - #[cfg(feature = "snappy")] - Codec::Snappy => snappy::compress(&mut decompressed, &mut compressed), - #[cfg(feature = "snappy")] - Codec::SnappyRaw => snappy::raw::compress_vec(decompressed.get_ref()).map(|v| { - let len = v.len(); - *compressed.get_mut() = v; - compressed.set_position(len as _); - decompressed.set_position(input_len as _); - len - }), - #[cfg(feature = "bzip2")] - Codec::Bzip2 => bzip2::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), - #[cfg(feature = "brotli")] - Codec::Brotli => brotli::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), - #[cfg(feature = "gzip")] - Codec::Gzip => gzip::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), - #[cfg(feature = "zstd")] - Codec::Zstd => zstd::compress(&mut decompressed, &mut compressed, level.map(|v: i32| v as i32)), - #[cfg(feature = "lz4")] - Codec::Lz4 => lz4::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), - // TODO: Support passing acceleration - #[cfg(feature = "lz4")] - Codec::Lz4Block => lz4::block::compress_vec(decompressed.get_ref(), level.map(|v| v as _), None, Some(true)) - .map(|v| { - let len = v.len(); - *compressed.get_mut() = v; - compressed.set_position(len as _); - decompressed.set_position(input_len as _); - len - }), // TODO - }; - match ret { - Ok(n) => { - *nbytes_read = decompressed.get_ref().len(); - *nbytes_written = n; - match compressed.flush() { - Ok(_) => Buffer::from(compressed.into_inner()), - Err(err) => { - error_to_ptr(err, error); - Buffer::empty() - } - } - } - Err(err) => { - error_to_ptr(err, error); - Buffer::empty() - } - } -} - -#[no_mangle] -pub extern "C" fn decompress_into( - codec: Codec, - input: *const u8, - input_len: usize, - output: *mut u8, - output_len: usize, - nbytes_read: &mut usize, - nbytes_written: &mut usize, - error: &mut *mut c_char, -) { - let mut compressed = Cursor::new(unsafe { std::slice::from_raw_parts(input, input_len) }); - let mut decompressed = Cursor::new(unsafe { std::slice::from_raw_parts_mut(output, output_len) }); - - let ret: Result = match codec { - #[cfg(feature = "snappy")] - Codec::Snappy => snappy::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "snappy")] - Codec::SnappyRaw => snappy::raw::decompress(compressed.get_ref(), decompressed.get_mut()), - #[cfg(feature = "bzip2")] - Codec::Bzip2 => bzip2::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "brotli")] - Codec::Brotli => brotli::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "gzip")] - Codec::Gzip => gzip::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "zstd")] - Codec::Zstd => zstd::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "lz4")] - Codec::Lz4 => lz4::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "lz4")] - Codec::Lz4Block => lz4::block::decompress_into(&compressed.get_ref(), decompressed.get_mut(), None), - }; - match ret { - Ok(n) => { - *nbytes_written = n; - *nbytes_read = compressed.get_ref().len(); - } - Err(err) => { - error_to_ptr(err, error); - *nbytes_written = 0; - *nbytes_read = 0; - } - } -} - -#[no_mangle] -pub extern "C" fn compress_into( - codec: Codec, - level: i32, - input: *const u8, - input_len: usize, - output: *mut u8, - output_len: usize, - nbytes_read: &mut usize, - nbytes_written: &mut usize, - error: &mut *mut c_char, -) { - let mut decompressed = unsafe { std::slice::from_raw_parts(input, input_len) }; - let mut compressed = unsafe { std::slice::from_raw_parts_mut(output, output_len) }; - - if level < 0 { - error_to_ptr("Requires compression >= 0", error); - return; - } - let level = Some(level); - - let ret: Result = match codec { - #[cfg(feature = "snappy")] - Codec::Snappy => snappy::compress(&mut decompressed, &mut compressed), - #[cfg(feature = "snappy")] - Codec::SnappyRaw => snappy::raw::compress(decompressed, &mut compressed), - #[cfg(feature = "bzip2")] - Codec::Bzip2 => bzip2::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), - #[cfg(feature = "brotli")] - Codec::Brotli => brotli::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), - #[cfg(feature = "gzip")] - Codec::Gzip => gzip::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), - #[cfg(feature = "zstd")] - Codec::Zstd => zstd::compress(&mut decompressed, &mut compressed, level.map(|v: i32| v as i32)), - #[cfg(feature = "lz4")] - Codec::Lz4 => lz4::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), - // TODO: Support passing acceleration - #[cfg(feature = "lz4")] - Codec::Lz4Block => lz4::block::compress_into(decompressed, compressed, level.map(|v| v as _), None, Some(true)), - }; - match ret { - Ok(n) => { - *nbytes_written = n; - *nbytes_read = decompressed.len(); - } - Err(err) => { - error_to_ptr(err, error); - *nbytes_written = 0; - *nbytes_read = 0; - } - } -} - -/* ---------- Streaming Compressor --------------- */ -#[no_mangle] -#[allow(unused_variables)] -pub extern "C" fn compressor_init(codec: StreamingCodec, level: i32, error: &mut *mut c_char) -> *mut c_void { - match codec { - #[cfg(feature = "bzip2")] - StreamingCodec::StreamingBzip2 => { - if level < 0 { - error_to_ptr("Bzip2 requires compression level >= 0", error); - return std::ptr::null_mut(); - } - let compressor = bzip2::bzip2::write::BzEncoder::new(vec![], bzip2::bzip2::Compression::new(level as _)); - Box::into_raw(Box::new(compressor)) as _ - } - #[cfg(feature = "brotli")] - StreamingCodec::StreamingBrotli => { - if level < 0 { - error_to_ptr("Brotli requires compression level >= 0", error); - return std::ptr::null_mut(); - } - let compressor = brotli::make_write_compressor(vec![], Some(level as _)); - Box::into_raw(Box::new(compressor)) as _ - } - #[cfg(feature = "gzip")] - StreamingCodec::StreamingGzip => { - if level < 1 { - error_to_ptr("Gzip requires compression level >= 1", error); - return std::ptr::null_mut(); - } - let compressor = gzip::flate2::write::GzEncoder::new(vec![], gzip::flate2::Compression::new(level as _)); - Box::into_raw(Box::new(compressor)) as _ - } - #[cfg(feature = "zstd")] - StreamingCodec::StreamingZstd => { - let compressor = zstd::zstd::Encoder::new(vec![], level); - Box::into_raw(Box::new(compressor)) as _ - } - #[cfg(feature = "snappy")] - StreamingCodec::StreamingSnappy => { - let compressor = snappy::snap::write::FrameEncoder::new(vec![]); - Box::into_raw(Box::new(compressor)) as _ - } - #[cfg(feature = "lz4")] - StreamingCodec::StreamingLz4 => { - if level < 0 { - error_to_ptr("Lz4 requires compression level >= 0", error); - return std::ptr::null_mut(); - } - let compressor = lz4::make_write_compressor(vec![], Some(level as _)); - Box::into_raw(Box::new(compressor)) as _ - } - } -} - -#[no_mangle] -pub extern "C" fn free_compressor(codec: StreamingCodec, compressor_ptr: &mut *mut c_void) { - if !(*compressor_ptr).is_null() { - { - match codec { - #[cfg(feature = "bzip2")] - StreamingCodec::StreamingBzip2 => { - let _ = unsafe { Box::from_raw(*compressor_ptr as *mut Bzip2Compressor) }; - } - #[cfg(feature = "brotli")] - StreamingCodec::StreamingBrotli => { - let _ = unsafe { Box::from_raw(*compressor_ptr as *mut BrotliCompressor) }; - } - #[cfg(feature = "gzip")] - StreamingCodec::StreamingGzip => { - let _ = unsafe { Box::from_raw(*compressor_ptr as *mut GzipCompressor) }; - } - #[cfg(feature = "zstd")] - StreamingCodec::StreamingZstd => { - let _ = unsafe { Box::from_raw(*compressor_ptr as *mut ZstdCompressor) }; - } - #[cfg(feature = "snappy")] - StreamingCodec::StreamingSnappy => { - let _ = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; - } - #[cfg(feature = "lz4")] - StreamingCodec::StreamingLz4 => { - let _ = unsafe { Box::from_raw(*compressor_ptr as *mut Lz4Compressor) }; - } - } - } - *compressor_ptr = std::ptr::null_mut(); - } -} - -#[no_mangle] -pub extern "C" fn compressor_inner(codec: StreamingCodec, compressor_ptr: &mut *mut c_void) -> Buffer { - match codec { - #[cfg(feature = "bzip2")] - StreamingCodec::StreamingBzip2 => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Bzip2Compressor) }; - let buffer = Buffer::from(compressor.get_ref()); - *compressor_ptr = Box::into_raw(compressor) as _; - buffer - } - #[cfg(feature = "brotli")] - StreamingCodec::StreamingBrotli => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut BrotliCompressor) }; - let buffer = Buffer::from(compressor.get_ref()); - *compressor_ptr = Box::into_raw(compressor) as _; - buffer - } - #[cfg(feature = "gzip")] - StreamingCodec::StreamingGzip => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut GzipCompressor) }; - let buffer = Buffer::from(compressor.get_ref()); - *compressor_ptr = Box::into_raw(compressor) as _; - buffer - } - #[cfg(feature = "zstd")] - StreamingCodec::StreamingZstd => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut ZstdCompressor) }; - let buffer = Buffer::from(compressor.get_ref()); - *compressor_ptr = Box::into_raw(compressor) as _; - buffer - } - #[cfg(feature = "snappy")] - StreamingCodec::StreamingSnappy => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; - let buffer = Buffer::from(compressor.get_ref()); - *compressor_ptr = Box::into_raw(compressor) as _; - buffer - } - #[cfg(feature = "lz4")] - StreamingCodec::StreamingLz4 => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Lz4Compressor) }; - let buffer = Buffer::from(compressor.writer()); - *compressor_ptr = Box::into_raw(compressor) as _; - buffer - } - } -} - -/// Finish the decompression stream and return the underlying buffer, transfering ownership to caller -#[no_mangle] -pub extern "C" fn compressor_finish( - codec: StreamingCodec, - compressor_ptr: &mut *mut c_void, - error: &mut *mut c_char, -) -> Buffer { - let buf = match codec { - #[cfg(feature = "bzip2")] - StreamingCodec::StreamingBzip2 => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Bzip2Compressor) }; - match compressor.finish() { - Ok(buf) => Buffer::from(buf), - Err(err) => { - error_to_ptr(err, error); - Buffer::empty() - } - } - } - #[cfg(feature = "brotli")] - StreamingCodec::StreamingBrotli => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut BrotliCompressor) }; - if let Err(err) = compressor.flush() { - error_to_ptr(err, error); - return Buffer::empty(); - } - Buffer::from(compressor.into_inner()) - } - #[cfg(feature = "gzip")] - StreamingCodec::StreamingGzip => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut GzipCompressor) }; - match compressor.finish() { - Ok(buf) => Buffer::from(buf), - Err(err) => { - error_to_ptr(err, error); - Buffer::empty() - } - } - } - #[cfg(feature = "zstd")] - StreamingCodec::StreamingZstd => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut ZstdCompressor) }; - match compressor.finish() { - Ok(buf) => Buffer::from(buf), - Err(err) => { - error_to_ptr(err, error); - Buffer::empty() - } - } - } - #[cfg(feature = "snappy")] - StreamingCodec::StreamingSnappy => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; - match compressor.into_inner() { - Ok(buf) => Buffer::from(buf), - Err(err) => { - error_to_ptr(err, error); - Buffer::empty() - } - } - } - #[cfg(feature = "lz4")] - StreamingCodec::StreamingLz4 => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Lz4Compressor) }; - let (w, ret) = compressor.finish(); - match ret { - Ok(_) => Buffer::from(w), - Err(err) => { - error_to_ptr(err, error); - Buffer::empty() - } - } - } - }; - *compressor_ptr = std::ptr::null_mut(); - buf -} - -#[no_mangle] -pub extern "C" fn compressor_flush(codec: StreamingCodec, compressor_ptr: &mut *mut c_void, error: &mut *mut c_char) { - match codec { - #[cfg(feature = "bzip2")] - StreamingCodec::StreamingBzip2 => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Bzip2Compressor) }; - if let Err(err) = compressor.flush() { - error_to_ptr(err, error); - } - *compressor_ptr = Box::into_raw(compressor) as _; - } - #[cfg(feature = "brotli")] - StreamingCodec::StreamingBrotli => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut BrotliCompressor) }; - if let Err(err) = compressor.flush() { - error_to_ptr(err, error); - } - *compressor_ptr = Box::into_raw(compressor) as _; - } - #[cfg(feature = "gzip")] - StreamingCodec::StreamingGzip => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut GzipCompressor) }; - if let Err(err) = compressor.flush() { - error_to_ptr(err, error); - } - *compressor_ptr = Box::into_raw(compressor) as _; - } - #[cfg(feature = "zstd")] - StreamingCodec::StreamingZstd => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut ZstdCompressor) }; - if let Err(err) = compressor.flush() { - error_to_ptr(err, error); - } - *compressor_ptr = Box::into_raw(compressor) as _; - } - #[cfg(feature = "snappy")] - StreamingCodec::StreamingSnappy => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; - if let Err(err) = compressor.flush() { - error_to_ptr(err, error); - } - *compressor_ptr = Box::into_raw(compressor) as _; - } - #[cfg(feature = "lz4")] - StreamingCodec::StreamingLz4 => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Lz4Compressor) }; - if let Err(err) = compressor.flush() { - error_to_ptr(err, error); - } - *compressor_ptr = Box::into_raw(compressor) as _; - } - } -} - -#[no_mangle] -pub extern "C" fn compressor_compress( - codec: StreamingCodec, - compressor_ptr: &mut *mut c_void, - input: *const u8, - input_len: usize, - nbytes_read: &mut usize, - nbytes_written: &mut usize, - error: &mut *mut c_char, -) { - let mut decompressed = Cursor::new(unsafe { slice::from_raw_parts(input, input_len) }); - match codec { - #[cfg(feature = "bzip2")] - StreamingCodec::StreamingBzip2 => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Bzip2Compressor) }; - match std::io::copy(&mut decompressed, &mut compressor) { - Ok(n) => { - *nbytes_written = n as _; - *nbytes_read = decompressed.position() as _; - } - Err(err) => { - error_to_ptr(err, error); - } - } - *compressor_ptr = Box::into_raw(compressor) as _; - } - #[cfg(feature = "brotli")] - StreamingCodec::StreamingBrotli => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut BrotliCompressor) }; - match std::io::copy(&mut decompressed, &mut compressor) { - Ok(n) => { - *nbytes_written = n as _; - *nbytes_read = decompressed.position() as _; - } - Err(err) => { - error_to_ptr(err, error); - } - } - *compressor_ptr = Box::into_raw(compressor) as _; - } - #[cfg(feature = "gzip")] - StreamingCodec::StreamingGzip => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut GzipCompressor) }; - match std::io::copy(&mut decompressed, &mut compressor) { - Ok(n) => { - *nbytes_written = n as _; - *nbytes_read = decompressed.position() as _; - } - Err(err) => { - error_to_ptr(err, error); - } - } - *compressor_ptr = Box::into_raw(compressor) as _; - } - #[cfg(feature = "zstd")] - StreamingCodec::StreamingZstd => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut ZstdCompressor) }; - match std::io::copy(&mut decompressed, &mut compressor) { - Ok(n) => { - *nbytes_written = n as _; - *nbytes_read = decompressed.position() as _; - } - Err(err) => { - error_to_ptr(err, error); - } - } - *compressor_ptr = Box::into_raw(compressor) as _; - } - #[cfg(feature = "snappy")] - StreamingCodec::StreamingSnappy => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; - match std::io::copy(&mut decompressed, &mut compressor) { - Ok(n) => { - *nbytes_written = n as _; - *nbytes_read = decompressed.position() as _; - } - Err(err) => { - error_to_ptr(err, error); - } - } - *compressor_ptr = Box::into_raw(compressor) as _; - } - #[cfg(feature = "lz4")] - StreamingCodec::StreamingLz4 => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Lz4Compressor) }; - match std::io::copy(&mut decompressed, &mut compressor) { - Ok(n) => { - *nbytes_written = n as _; - *nbytes_read = decompressed.position() as _; - } - Err(err) => { - error_to_ptr(err, error); - } - } - *compressor_ptr = Box::into_raw(compressor) as _; - } - } -} -#[no_mangle] -#[allow(unused_variables)] -pub extern "C" fn decompressor_init(codec: StreamingCodec) -> *mut c_void { - // for decompression, we really only need a buffer for storing output - // some streaming codecs, like snappy, don't have a write impl and only a - // read impl for decompressors - let buf: Vec = vec![]; - Box::into_raw(Box::new(Cursor::new(buf))) as _ -} - -#[no_mangle] -#[allow(unused_variables)] -pub extern "C" fn free_decompressor(codec: StreamingCodec, decompressor_ptr: &mut *mut c_void) { - if !(*decompressor_ptr).is_null() { - { - let _ = unsafe { Box::from_raw(*decompressor_ptr as *mut Decompressor) }; - } - *decompressor_ptr = std::ptr::null_mut(); - } -} - -#[no_mangle] -#[allow(unused_variables)] -pub extern "C" fn decompressor_inner(codec: StreamingCodec, decompressor_ptr: &mut *mut c_void) -> Buffer { - let decompressor = unsafe { Box::from_raw(*decompressor_ptr as *mut Decompressor) }; - let buf = Buffer::from(decompressor.get_ref()); - *decompressor_ptr = Box::into_raw(decompressor) as _; - buf -} - -/// Finish the decompression stream and return the underlying buffer, transfering ownership to caller -#[no_mangle] -#[allow(unused_variables)] -pub extern "C" fn decompressor_finish( - codec: StreamingCodec, - decompressor_ptr: &mut *mut c_void, - error: &mut *mut c_char, -) -> Buffer { - let mut cursor = unsafe { Box::from_raw(*decompressor_ptr as *mut Decompressor) }; - if let Err(err) = cursor.flush() { - error_to_ptr(err, error); - return Buffer::empty(); - }; - *decompressor_ptr = std::ptr::null_mut(); - Buffer::from(cursor.into_inner()) -} - -#[no_mangle] -#[allow(unused_variables)] -pub extern "C" fn decompressor_flush( - codec: StreamingCodec, - decompressor_ptr: &mut *mut c_void, - error: &mut *mut c_char, -) { - let mut cursor = unsafe { Box::from_raw(*decompressor_ptr as *mut Decompressor) }; - if let Err(err) = cursor.flush() { - error_to_ptr(err, error); - } - *decompressor_ptr = Box::into_raw(cursor) as _; -} - -#[no_mangle] -pub extern "C" fn decompressor_decompress( - codec: StreamingCodec, - decompressor_ptr: &mut *mut c_void, - input: *const u8, - input_len: usize, - nbytes_read: &mut usize, - nbytes_written: &mut usize, - error: &mut *mut c_char, -) { - let mut decompressed = unsafe { Box::from_raw(*decompressor_ptr as *mut Decompressor) }; - let start_pos = decompressed.position(); - let mut compressed = Cursor::new(unsafe { std::slice::from_raw_parts(input, input_len) }); - let ret: Result = match codec { - #[cfg(feature = "bzip2")] - StreamingCodec::StreamingBzip2 => bzip2::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "gzip")] - StreamingCodec::StreamingGzip => gzip::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "brotli")] - StreamingCodec::StreamingBrotli => brotli::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "zstd")] - StreamingCodec::StreamingZstd => zstd::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "snappy")] - StreamingCodec::StreamingSnappy => snappy::decompress(&mut compressed, &mut decompressed), - #[cfg(feature = "lz4")] - StreamingCodec::StreamingLz4 => lz4::decompress(&mut compressed, &mut decompressed), - }; - match ret { - Ok(_) => { - *nbytes_read = compressed.position() as _; - *nbytes_written = (decompressed.position() - start_pos) as _; - } - Err(err) => { - error_to_ptr(err, error); - } - }; - *decompressor_ptr = Box::into_raw(decompressed) as _; -} - -/* -------- Codec specific functions ----------*/ -#[cfg(feature = "lz4")] -#[no_mangle] -pub extern "C" fn lz4_frame_max_compression_level() -> usize { - lz4::LZ4_ACCELERATION_MAX as _ -} - -#[cfg(feature = "lz4")] -#[no_mangle] -pub extern "C" fn lz4_frame_max_compressed_len(input_len: usize, compression_level: i32) -> usize { - lz4::compress_bound(input_len, Some(compression_level as _)) -} - -#[cfg(feature = "lz4")] -#[no_mangle] -#[allow(unused_variables)] -pub extern "C" fn lz4_block_max_compressed_len(input_len: usize, error: &mut *mut c_char) -> usize { - lz4::block::compress_bound(input_len, Some(true)) -} - -#[cfg(feature = "deflate")] -#[no_mangle] -pub extern "C" fn deflate_max_compressed_len(input_len: usize, level: i32) -> usize { - deflate::compress_bound(input_len, Some(level)) -} - -#[cfg(feature = "gzip")] -#[no_mangle] -pub extern "C" fn gzip_max_compressed_len(input_len: usize, level: i32) -> usize { - let level = if level < 0 { 0 } else { level }; - gzip::compress_bound(input_len, Some(level)).unwrap() -} - -#[cfg(feature = "zstd")] -#[no_mangle] -pub extern "C" fn zstd_max_compressed_len(input_len: usize) -> usize { - zstd::compress_bound(input_len) -} - -#[cfg(feature = "snappy")] -#[no_mangle] -pub extern "C" fn snappy_raw_max_compressed_len(input_len: usize) -> usize { - snap::raw::max_compress_len(input_len) -} - -#[cfg(feature = "brotli")] -#[no_mangle] -pub extern "C" fn brotli_max_compressed_len(input_len: usize) -> usize { - brotli::compress_bound(input_len) -} - -#[cfg(feature = "snappy")] -#[no_mangle] -pub extern "C" fn snappy_raw_decompressed_len(input: *const u8, input_len: usize, error: &mut *mut c_char) -> isize { - let input = unsafe { slice::from_raw_parts(input, input_len) }; - match snap::raw::decompress_len(input) { - Ok(n) => n as _, - Err(err) => { - error_to_ptr(err, error); - -1 - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - const DATA: &[u8; 5] = b"bytes"; - - #[cfg(feature = "lz4")] - #[test] - fn test_lz4_frame_max_compressed_len() { - // A known simple test case, expected len taken from lz4/lz4 repo - let len = lz4_frame_max_compressed_len(25, 4); - assert_eq!(len, 65544); - } - - #[cfg(feature = "lz4")] - #[test] - fn test_lz4_block_max_compressed_len() { - let mut error: *mut c_char = std::ptr::null_mut(); - let len = lz4_block_max_compressed_len(10, &mut error); - assert!(error.is_null()); - assert_eq!(len, 30); - } - - #[cfg(feature = "snappy")] - #[test] - fn test_snappy_raw_max_compressed_len() { - let len = snappy_raw_max_compressed_len(10); - assert_eq!(len, 43); - } - - #[cfg(feature = "snappy")] - #[test] - fn test_snappy_raw_decompressed_len() { - let uncompressed = b"bytes"; - let mut compressed = vec![0; snappy_raw_max_compressed_len(uncompressed.len())]; - let nbytes_written = snappy::raw::compress(uncompressed, &mut compressed).unwrap(); - - let mut error: *mut c_char = std::ptr::null_mut(); - let len = snappy_raw_decompressed_len(compressed.as_ptr(), nbytes_written, &mut error); - - assert!(error.is_null()); - assert_eq!(len as usize, uncompressed.len()); - } - - #[cfg(feature = "snappy")] - #[test] - fn test_snappy_roundtrip() { - let mut expected = vec![]; - snappy::compress(Cursor::new(DATA), &mut expected).unwrap(); - roundtrip(Codec::Snappy, &expected, 0); - } - #[cfg(feature = "snappy")] - #[test] - fn test_snappy_raw_roundtrip() { - let expected = snappy::raw::compress_vec(DATA).unwrap(); - roundtrip(Codec::SnappyRaw, &expected, 0); - } - #[cfg(feature = "lz4")] - #[test] - fn test_lz4_roundtrip() { - let mut expected = Cursor::new(vec![]); - lz4::compress(Cursor::new(DATA), &mut expected, Some(6)).unwrap(); - let expected = expected.into_inner(); - roundtrip(Codec::Lz4, &expected, 6); - } - #[cfg(feature = "lz4")] - #[test] - fn test_lz4_block_roundtrip() { - let expected = lz4::block::compress_vec(DATA, Some(6), Some(1), Some(true)).unwrap(); - roundtrip(Codec::Lz4Block, &expected, 6); - } - #[cfg(feature = "bzip2")] - #[test] - fn test_bzip2_roundtrip() { - let mut expected = Cursor::new(vec![]); - bzip2::compress(Cursor::new(DATA), &mut expected, Some(6)).unwrap(); - let expected = expected.into_inner(); - roundtrip(Codec::Bzip2, &expected, 6); - } - #[cfg(feature = "brotli")] - #[test] - fn test_brotli_roundtrip() { - let mut expected = Cursor::new(vec![]); - brotli::compress(Cursor::new(DATA), &mut expected, Some(6)).unwrap(); - let expected = expected.into_inner(); - roundtrip(Codec::Brotli, &expected, 6); - } - #[cfg(feature = "zstd")] - #[test] - fn test_zstd_roundtrip() { - let mut expected = Cursor::new(vec![]); - zstd::compress(Cursor::new(DATA), &mut expected, Some(6)).unwrap(); - let expected = expected.into_inner(); - roundtrip(Codec::Zstd, &expected, 6); - } - - fn roundtrip(codec: Codec, expected: &[u8], level: i32) { - let mut nbytes_read = 0; - let mut nbytes_written = 0; - let mut error = std::ptr::null_mut(); - let buffer = compress( - codec, - level, - DATA.as_ptr(), - DATA.len(), - &mut nbytes_read, - &mut nbytes_written, - &mut error, - ); - if !error.is_null() { - let error = unsafe { CString::from_raw(error) }; - panic!("Failed: {}", error.to_str().unwrap()); - } - assert_eq!(nbytes_read, DATA.len()); - assert_eq!(nbytes_written, buffer.len); - assert!(buffer.owned); - - // retrieve compressed data and compare to actual rust impl - let compressed = unsafe { Vec::from_raw_parts(buffer.data as *mut u8, buffer.len, buffer.len) }; - assert_eq!(&compressed, expected); - - // And decompress - nbytes_read = 0; - nbytes_written = 0; - - let buffer = decompress( - codec, - compressed.as_ptr(), - compressed.len(), - &mut nbytes_read, - &mut nbytes_written, - &mut error, - ); - if !error.is_null() { - let error = unsafe { CString::from_raw(error) }; - panic!("Failed: {}", error.to_str().unwrap()); - } - assert_eq!(nbytes_read, compressed.len()); - assert_eq!(nbytes_written, buffer.len); - assert_eq!(nbytes_written, DATA.len()); - assert!(buffer.owned); - let decompressed = unsafe { Vec::from_raw_parts(buffer.data as *mut u8, buffer.len, buffer.len) }; - assert_eq!(DATA.as_slice(), &decompressed); - } -} diff --git a/libcramjam/src/deflate.rs b/libcramjam/src/deflate.rs deleted file mode 100644 index c946b99f..00000000 --- a/libcramjam/src/deflate.rs +++ /dev/null @@ -1,33 +0,0 @@ -//! deflate de/compression interface -pub use flate2; -use flate2::read::{DeflateDecoder, DeflateEncoder}; -use flate2::Compression; -use libdeflater; -use std::io::prelude::*; -use std::io::Error; - -const DEFAULT_COMPRESSION_LEVEL: u32 = 6; - -pub fn compress_bound(input_len: usize, level: Option) -> usize { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL as _); - let mut c = libdeflater::Compressor::new(libdeflater::CompressionLvl::new(level).unwrap()); - c.deflate_compress_bound(input_len) -} - -/// Decompress gzip data -#[inline(always)] -pub fn decompress(input: R, output: &mut W) -> Result { - let mut decoder = DeflateDecoder::new(input); - let n_bytes = std::io::copy(&mut decoder, output)?; - Ok(n_bytes as usize) -} - -/// Compress gzip data -#[inline(always)] -pub fn compress(input: R, output: &mut W, level: Option) -> Result { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); - - let mut encoder = DeflateEncoder::new(input, Compression::new(level)); - let n_bytes = std::io::copy(&mut encoder, output)?; - Ok(n_bytes as usize) -} diff --git a/libcramjam/src/gzip.rs b/libcramjam/src/gzip.rs deleted file mode 100644 index 588c2546..00000000 --- a/libcramjam/src/gzip.rs +++ /dev/null @@ -1,53 +0,0 @@ -//! gzip de/compression interface -pub use flate2; -use flate2::read::{GzEncoder, MultiGzDecoder}; -use flate2::Compression; -use std::io::prelude::*; -use std::io::{Cursor, Error}; - -const DEFAULT_COMPRESSION_LEVEL: u32 = 6; - -pub fn compress_bound(input_len: usize, level: Option) -> Result { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL as _); - let mut c = libdeflater::Compressor::new( - libdeflater::CompressionLvl::new(level) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, format!("{:?}", e)))?, - ); - Ok(c.gzip_compress_bound(input_len)) -} - -/// Decompress gzip data -#[inline(always)] -pub fn decompress(input: R, output: &mut W) -> Result { - let mut decoder = MultiGzDecoder::new(input); - let mut out = vec![]; - let n_bytes = decoder.read_to_end(&mut out)?; - std::io::copy(&mut Cursor::new(out.as_slice()), output)?; - Ok(n_bytes as usize) -} - -/// Compress gzip data -#[inline(always)] -pub fn compress(input: R, output: &mut W, level: Option) -> Result { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); - let mut encoder = GzEncoder::new(input, Compression::new(level)); - let n_bytes = std::io::copy(&mut encoder, output)?; - Ok(n_bytes as usize) -} - -#[cfg(test)] -mod tests { - - #[test] - fn test_gzip_multiple_streams() { - let mut out1 = vec![]; - let mut out2 = vec![]; - super::compress(b"foo".to_vec().as_slice(), &mut out1, None).unwrap(); - super::compress(b"bar".to_vec().as_slice(), &mut out2, None).unwrap(); - - let mut out3 = vec![]; - out1.extend_from_slice(&out2); - super::decompress(out1.as_slice(), &mut out3).unwrap(); - assert_eq!(out3, b"foobar".to_vec()); - } -} diff --git a/libcramjam/src/lib.rs b/libcramjam/src/lib.rs deleted file mode 100644 index 8581669e..00000000 --- a/libcramjam/src/lib.rs +++ /dev/null @@ -1,129 +0,0 @@ -#[cfg(feature = "blosc2")] -pub mod blosc2; -#[cfg(feature = "brotli")] -pub mod brotli; -#[cfg(feature = "bzip2")] -pub mod bzip2; -#[cfg(feature = "capi")] -mod capi; -#[cfg(feature = "deflate")] -pub mod deflate; -#[cfg(feature = "gzip")] -pub mod gzip; -#[cfg(feature = "lz4")] -pub mod lz4; -#[cfg(feature = "snappy")] -pub mod snappy; -#[cfg(feature = "xz")] -pub mod xz; -#[cfg(feature = "zstd")] -pub mod zstd; - -#[cfg(test)] -mod tests { - - use std::io::Cursor; - - // Default testing data - fn gen_data() -> Vec { - (0..1_000_000) - .map(|_| b"oh what a beautiful morning, oh what a beautiful day!!".to_vec()) - .flat_map(|v| v) - .collect() - } - - // Single test generation - macro_rules! round_trip { - ($name:ident($compress_output:ident -> $decompress_output:ident), variant=$variant:ident, compressed_len=$compressed_len:literal, $(, $args:ident)*) => { - #[test] - fn $name() { - let data = gen_data(); - - let mut compressed = Vec::new(); - - let compressed_size = if stringify!($decompress_output) == "Slice" { - compressed = (0..data.len()).map(|_| 0).collect::>(); - let mut cursor = Cursor::new(compressed.as_mut_slice()); - crate::$variant::compress(&mut Cursor::new(data.as_slice()), &mut cursor $(, $args)*).unwrap() - } else { - crate::$variant::compress(&mut Cursor::new(data.as_slice()), &mut Cursor::new(&mut compressed) $(, $args)*).unwrap() - }; - - assert_eq!(compressed_size, $compressed_len); - compressed.truncate(compressed_size); - - let mut decompressed = Vec::new(); - - let decompressed_size = if stringify!($decompress_output) == "Slice" { - decompressed = (0..data.len()).map(|_| 0).collect::>(); - let mut cursor = Cursor::new(decompressed.as_mut_slice()); - crate::$variant::decompress(&mut Cursor::new(&compressed), &mut cursor).unwrap() - } else { - crate::$variant::decompress(&mut Cursor::new(&compressed), &mut decompressed).unwrap() - }; - assert_eq!(decompressed_size, data.len()); - if &decompressed[..decompressed_size] != &data { - panic!("Decompressed and original data do not match! :-(") - } - } - } - } - - // macro to generate each variation of Output::* roundtrip. - macro_rules! test_variant { - ($variant:ident, compressed_len=$compressed_len:literal $(, $args:tt)*) => { - #[cfg(test)] - mod $variant { - use super::*; - round_trip!(roundtrip_compress_via_slice_decompress_via_slice(Slice -> Slice), variant=$variant, compressed_len=$compressed_len, $(, $args)* ); - round_trip!(roundtrip_compress_via_slice_decompress_via_vector(Slice -> Vector), variant=$variant, compressed_len=$compressed_len, $(, $args)* ); - round_trip!(roundtrip_compress_via_vector_decompress_via_slice(Vector -> Slice), variant=$variant, compressed_len=$compressed_len, $(, $args)* ); - round_trip!(roundtrip_compress_via_vector_decompress_via_vector(Vector -> Vector), variant=$variant, compressed_len=$compressed_len, $(, $args)* ); - } - } - } - - // Expected compressed_len, subsequent args are supplied to the variant's `compress` call. - #[cfg(feature = "snappy")] - test_variant!(snappy, compressed_len = 2_572_398); - - #[cfg(feature = "gzip")] - test_variant!(gzip, compressed_len = 157_192, None); - - #[cfg(feature = "brotli")] - test_variant!(brotli, compressed_len = 128, None); - - #[cfg(feature = "bzip2")] - test_variant!(bzip2, compressed_len = 14_207, None); - - #[cfg(feature = "deflate")] - test_variant!(deflate, compressed_len = 157_174, None); - - #[cfg(feature = "zstd")] - test_variant!(zstd, compressed_len = 4990, None); - - #[cfg(feature = "lz4")] - test_variant!(lz4, compressed_len = 303_278, None); - - #[cfg(feature = "blosc2")] - test_variant!(blosc2, compressed_len = 791_923); - - #[cfg(feature = "xz")] - #[allow(non_upper_case_globals)] - const format: Option = None; - - #[allow(non_upper_case_globals)] - #[cfg(feature = "xz")] - const check: Option = None; - - #[allow(non_upper_case_globals)] - #[cfg(feature = "xz")] - const filters: Option = None; - - #[allow(non_upper_case_globals)] - #[cfg(feature = "xz")] - const opts: Option = None; - - #[cfg(feature = "xz")] - test_variant!(xz, compressed_len = 8_020, None, format, check, filters, opts); -} diff --git a/libcramjam/src/lz4.rs b/libcramjam/src/lz4.rs deleted file mode 100644 index bd77fd4a..00000000 --- a/libcramjam/src/lz4.rs +++ /dev/null @@ -1,197 +0,0 @@ -//! lz4 de/compression interface -pub use lz4; -use std::io::{BufReader, Cursor, Error, Read, Write}; - -pub const DEFAULT_COMPRESSION_LEVEL: u32 = 4; -pub const LZ4_ACCELERATION_MAX: u32 = 65537; - -#[inline(always)] -pub fn make_write_compressor(output: W, level: Option) -> Result, Error> { - let comp = lz4::EncoderBuilder::new() - .level(level.unwrap_or(DEFAULT_COMPRESSION_LEVEL)) - .auto_flush(true) - .favor_dec_speed(true) - .build(output)?; - Ok(comp) -} - -/// Decompress lz4 data -#[inline(always)] -pub fn decompress(input: R, output: &mut W) -> Result { - let mut decoder = lz4::Decoder::new(input)?; - let n_bytes = std::io::copy(&mut decoder, output)?; - decoder.finish().1?; - Ok(n_bytes as usize) -} - -#[inline(always)] -pub fn compress_bound(input_len: usize, level: Option) -> usize { - let mut prefs: std::mem::MaybeUninit = std::mem::MaybeUninit::zeroed(); - let prefs_ptr = prefs.as_mut_ptr(); - unsafe { - std::ptr::write( - std::ptr::addr_of_mut!((*prefs_ptr).compression_level), - level.unwrap_or(DEFAULT_COMPRESSION_LEVEL), - ) - }; - - let n = unsafe { lz4::liblz4::LZ4F_compressBound(input_len, prefs.as_ptr()) }; - unsafe { std::ptr::drop_in_place(std::ptr::addr_of_mut!((*prefs_ptr).compression_level)) }; - n -} - -/// Compress lz4 data -#[inline(always)] -pub fn compress(input: R, output: &mut W, level: Option) -> Result { - // Can add an additional constraint to `Seek` for output but that is not great for API - // so very unfortunately, we have an intermediate buffer to get bytes written to output - // as lz4::Encoder is Write only - let out_buffer = vec![]; - let mut encoder = make_write_compressor(out_buffer, level)?; - - // this returns, bytes read from uncompressed, input; we want bytes written - // but lz4 only implements Read for Encoder - let mut buf = BufReader::new(input); - std::io::copy(&mut buf, &mut encoder)?; - let (w, r) = encoder.finish(); - r?; - - // Now copy bytes from temp output buffer to actual output, returning number of bytes written to 'output'. - let nbytes = std::io::copy(&mut Cursor::new(w), output)?; - Ok(nbytes as _) -} - -pub mod block { - use lz4::block::CompressionMode; - use std::io::Error; - - const PREPEND_SIZE: bool = true; - - #[inline(always)] - pub fn compress_bound(input_len: usize, prepend_size: Option) -> usize { - match lz4::block::compress_bound(input_len) { - Ok(len) => { - if prepend_size.unwrap_or(true) { - len + 4 - } else { - len - } - } - Err(_) => 0, - } - } - - /// Decompress into Vec. Must have been compressed with prepended uncompressed size. - /// will panic otherwise. - #[inline(always)] - pub fn decompress_vec(input: &[u8]) -> Result, Error> { - if input.len() < 4 { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "Input not long enough", - )); - } - let bytes: [u8; 4] = input[..4].try_into().unwrap(); - let len = u32::from_le_bytes(bytes); - let mut buf = vec![0u8; len as usize]; - let nbytes = decompress_into(&input[4..], &mut buf, Some(false))?; - buf.truncate(nbytes); - Ok(buf) - } - - /// NOTE: input is expected to **not** have the size prepended. Calling decompress_into is - /// saying you already know the output buffer min size. `output` can be larger, but it cannot - /// be smaller than what's required. - #[inline(always)] - pub fn decompress_into(input: &[u8], output: &mut [u8], size_prepended: Option) -> Result { - let uncompressed_size = if size_prepended.is_some_and(|v| v) { - None // decompress_to_buffer will read from prepended size - } else { - Some(output.len() as _) - }; - let nbytes = lz4::block::decompress_to_buffer(input, uncompressed_size, output)?; - Ok(nbytes) - } - - #[inline(always)] - pub fn compress_vec( - input: &[u8], - level: Option, - acceleration: Option, - prepend_size: Option, - ) -> Result, Error> { - let len = compress_bound(input.len(), prepend_size); - let mut buffer = vec![0u8; len]; - let nbytes = compress_into(input, &mut buffer, level, acceleration, prepend_size)?; - buffer.truncate(nbytes); - Ok(buffer) - } - - #[inline(always)] - pub fn compress_into( - input: &[u8], - output: &mut [u8], - level: Option, - acceleration: Option, - prepend_size: Option, - ) -> Result { - let prepend_size = prepend_size.unwrap_or(PREPEND_SIZE); - let mode = compression_mode(None, level.map(|v| v as _), acceleration)?; - let nbytes = lz4::block::compress_to_buffer(input, Some(mode), prepend_size, output)?; - Ok(nbytes) - } - - #[inline] - fn compression_mode( - mode: Option<&str>, - compression: Option, - acceleration: Option, - ) -> Result { - let m = match mode { - Some(m) => match m { - "default" => CompressionMode::DEFAULT, - "fast" => CompressionMode::FAST(acceleration.unwrap_or(1)), - "high_compression" => CompressionMode::HIGHCOMPRESSION(compression.unwrap_or(9)), - _ => { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "Invalid compression string, needed one of 'default', 'fast', or 'high_compression'", - )) - } - }, - None => CompressionMode::DEFAULT, - }; - Ok(m) - } - - #[cfg(test)] - mod tests { - - use super::{compress_vec, decompress_into, decompress_vec}; - - const DATA: &[u8; 14] = b"howdy neighbor"; - - #[test] - fn round_trip_store_size() { - let compressed = compress_vec(DATA, None, None, Some(true)).unwrap(); - let decompressed = decompress_vec(&compressed).unwrap(); - assert_eq!(&decompressed, DATA); - } - #[test] - fn round_trip_no_store_size() { - let compressed = compress_vec(DATA, None, None, Some(false)).unwrap(); - - // decompressed_vec depends on prepended_size, so we can't use that. - assert!(decompress_vec(&compressed).is_err()); - - let mut decompressed = vec![0u8; DATA.len()]; - decompress_into(&compressed, &mut decompressed, Some(false)).unwrap(); - assert_eq!(&decompressed, DATA); - - // decompressed_into will allow a larger output buffer than what's needed - let mut decompressed = vec![0u8; DATA.len() + 5_000]; - let n = decompress_into(&compressed, &mut decompressed, Some(false)).unwrap(); - assert_eq!(&decompressed[..n], DATA); - } - } -} diff --git a/libcramjam/src/snappy.rs b/libcramjam/src/snappy.rs deleted file mode 100644 index 9911b097..00000000 --- a/libcramjam/src/snappy.rs +++ /dev/null @@ -1,53 +0,0 @@ -//! snappy de/compression interface -pub use snap; -use snap::read::{FrameDecoder, FrameEncoder}; -use std::io; -use std::io::{Read, Result, Write}; - -/// Decompress snappy data framed -#[inline(always)] -pub fn decompress(input: R, output: &mut W) -> Result { - let mut decoder = FrameDecoder::new(input); - let n_bytes = io::copy(&mut decoder, output)?; - Ok(n_bytes as usize) -} - -/// Decompress snappy data framed -#[inline(always)] -pub fn compress(data: R, output: &mut W) -> Result { - let mut encoder = FrameEncoder::new(data); - let n_bytes = io::copy(&mut encoder, output)?; - Ok(n_bytes as usize) -} - -pub mod raw { - use super::*; - - #[inline(always)] - pub fn compress_vec(input: &[u8]) -> Result> { - snap::raw::Encoder::new() - .compress_vec(input) - .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string())) - } - - #[inline(always)] - pub fn compress(input: &[u8], output: &mut [u8]) -> Result { - snap::raw::Encoder::new() - .compress(input, output) - .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string())) - } - - #[inline(always)] - pub fn decompress_vec(input: &[u8]) -> Result> { - snap::raw::Decoder::new() - .decompress_vec(input) - .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string())) - } - - #[inline(always)] - pub fn decompress(input: &[u8], output: &mut [u8]) -> Result { - snap::raw::Decoder::new() - .decompress(input, output) - .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string())) - } -} diff --git a/libcramjam/src/xz.rs b/libcramjam/src/xz.rs deleted file mode 100644 index 87535f8a..00000000 --- a/libcramjam/src/xz.rs +++ /dev/null @@ -1,105 +0,0 @@ -//! LZMA / XZ de/compression interface -//! Note this is still a bit of a work in progress, especially when it comes -//! to filter chain support. -use std::io::{self, BufRead, BufReader}; -use std::io::{Read, Result, Write}; -pub use xz2; -use xz2::read::{XzDecoder, XzEncoder}; -use xz2::stream::{Check as xz2Check, Stream, TELL_ANY_CHECK}; -pub use xz2::stream::{Filters, LzmaOptions, MatchFinder, Mode}; - -/// Possible formats -#[derive(Clone, Debug, Copy)] -pub enum Format { - /// Auto select the format, for compression this is XZ, - /// for decompression it will be determined by the compressed input. - AUTO, - /// The `.xz` format (default) - XZ, - /// Legacy `.lzma` format. - ALONE, - /// Raw data stream - RAW, -} - -impl Default for Format { - fn default() -> Self { - Format::XZ - } -} - -/// Possible Check configurations -#[derive(Debug, Clone, Copy)] -pub enum Check { - Crc64, - Crc32, - Sha256, - None, -} - -impl Into for Check { - fn into(self) -> xz2Check { - match self { - Self::Crc64 => xz2Check::Crc64, - Self::Crc32 => xz2Check::Crc32, - Self::Sha256 => xz2Check::Sha256, - Self::None => xz2Check::None, - } - } -} - -/// Decompress snappy data framed -#[inline(always)] -pub fn decompress(input: R, output: &mut W) -> Result { - let xz_magicbytes = b"\xfd7zXZ\x00"; - let mut input = BufReader::new(input); - let stream = { - let innerbuf = input.fill_buf()?; - if innerbuf.len() >= xz_magicbytes.len() && &innerbuf[..xz_magicbytes.len()] == xz_magicbytes { - Stream::new_auto_decoder(u64::MAX, TELL_ANY_CHECK)? - } else { - Stream::new_lzma_decoder(u64::MAX)? - } - }; - let mut decoder = XzDecoder::new_stream(input, stream); - let n_bytes = io::copy(&mut decoder, output)?; - Ok(n_bytes as usize) -} - -/// Decompress snappy data framed -#[inline(always)] -pub fn compress( - data: R, - output: &mut W, - preset: Option, - format: Option>, - check: Option>, - filters: Option>, - options: Option>, -) -> Result { - let preset = preset.unwrap_or(6); // same as python default - let stream = match format.map(Into::into).unwrap_or_default() { - Format::AUTO | Format::XZ => { - let check = check.map(Into::into).unwrap_or(Check::Crc64); // default for xz - let stream = Stream::new_easy_encoder(preset, check.into())?; - stream - } - Format::ALONE => { - let opts = match options { - Some(opts) => opts.into(), - None => LzmaOptions::new_preset(preset)?, - }; - let stream = Stream::new_lzma_encoder(&opts)?; - stream - } - Format::RAW => { - let check = check.map(Into::into).unwrap_or(Check::None); // default for Alone and Raw formats - let filters = filters.map(Into::into).unwrap_or_else(|| Filters::new()); - let stream = Stream::new_stream_encoder(&filters, check.into())?; - stream - } - }; - let mut encoder = XzEncoder::new_stream(data, stream); - let n_bytes = io::copy(&mut encoder, output)?; - Ok(n_bytes as usize) -} diff --git a/libcramjam/src/zstd.rs b/libcramjam/src/zstd.rs deleted file mode 100644 index 631e48cd..00000000 --- a/libcramjam/src/zstd.rs +++ /dev/null @@ -1,27 +0,0 @@ -//! zstd de/compression interface -use std::io::{Error, Read, Write}; -pub use zstd; - -const DEFAULT_COMPRESSION_LEVEL: i32 = 0; - -/// Get the max compressed length for a single pass -pub fn compress_bound(len: usize) -> usize { - zstd::zstd_safe::compress_bound(len) -} - -/// Decompress gzip data -#[inline(always)] -pub fn decompress(input: R, output: &mut W) -> Result { - let mut decoder = zstd::stream::read::Decoder::new(input)?; - let n_bytes = std::io::copy(&mut decoder, output)?; - Ok(n_bytes as usize) -} - -/// Compress gzip data -#[inline(always)] -pub fn compress(input: R, output: &mut W, level: Option) -> Result { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); // 0 will use zstd's default, currently 3 - let mut encoder = zstd::stream::read::Encoder::new(input, level)?; - let n_bytes = std::io::copy(&mut encoder, output)?; - Ok(n_bytes as usize) -} diff --git a/cramjam-python/mkdocs.yml b/mkdocs.yml similarity index 100% rename from cramjam-python/mkdocs.yml rename to mkdocs.yml diff --git a/cramjam-python/pyproject.toml b/pyproject.toml similarity index 100% rename from cramjam-python/pyproject.toml rename to pyproject.toml diff --git a/cramjam-python/src/blosc2.rs b/src/blosc2.rs similarity index 100% rename from cramjam-python/src/blosc2.rs rename to src/blosc2.rs diff --git a/cramjam-python/src/brotli.rs b/src/brotli.rs similarity index 100% rename from cramjam-python/src/brotli.rs rename to src/brotli.rs diff --git a/cramjam-python/src/bzip2.rs b/src/bzip2.rs similarity index 100% rename from cramjam-python/src/bzip2.rs rename to src/bzip2.rs diff --git a/cramjam-python/src/deflate.rs b/src/deflate.rs similarity index 100% rename from cramjam-python/src/deflate.rs rename to src/deflate.rs diff --git a/cramjam-python/src/exceptions.rs b/src/exceptions.rs similarity index 100% rename from cramjam-python/src/exceptions.rs rename to src/exceptions.rs diff --git a/cramjam-python/src/experimental.rs b/src/experimental.rs similarity index 100% rename from cramjam-python/src/experimental.rs rename to src/experimental.rs diff --git a/cramjam-python/src/gzip.rs b/src/gzip.rs similarity index 100% rename from cramjam-python/src/gzip.rs rename to src/gzip.rs diff --git a/cramjam-python/src/io.rs b/src/io.rs similarity index 100% rename from cramjam-python/src/io.rs rename to src/io.rs diff --git a/cramjam-python/src/lib.rs b/src/lib.rs similarity index 100% rename from cramjam-python/src/lib.rs rename to src/lib.rs diff --git a/cramjam-python/src/lz4.rs b/src/lz4.rs similarity index 100% rename from cramjam-python/src/lz4.rs rename to src/lz4.rs diff --git a/cramjam-python/src/snappy.rs b/src/snappy.rs similarity index 100% rename from cramjam-python/src/snappy.rs rename to src/snappy.rs diff --git a/cramjam-python/src/xz.rs b/src/xz.rs similarity index 100% rename from cramjam-python/src/xz.rs rename to src/xz.rs diff --git a/cramjam-python/src/zstd.rs b/src/zstd.rs similarity index 100% rename from cramjam-python/src/zstd.rs rename to src/zstd.rs diff --git a/cramjam-python/tests/__init__.py b/tests/__init__.py similarity index 100% rename from cramjam-python/tests/__init__.py rename to tests/__init__.py diff --git a/cramjam-python/tests/conftest.py b/tests/conftest.py similarity index 100% rename from cramjam-python/tests/conftest.py rename to tests/conftest.py diff --git a/cramjam-python/tests/data/integration/plaintext.txt b/tests/data/integration/plaintext.txt similarity index 100% rename from cramjam-python/tests/data/integration/plaintext.txt rename to tests/data/integration/plaintext.txt diff --git a/cramjam-python/tests/data/integration/plaintext.txt.br b/tests/data/integration/plaintext.txt.br similarity index 100% rename from cramjam-python/tests/data/integration/plaintext.txt.br rename to tests/data/integration/plaintext.txt.br diff --git a/cramjam-python/tests/data/integration/plaintext.txt.bz2 b/tests/data/integration/plaintext.txt.bz2 similarity index 100% rename from cramjam-python/tests/data/integration/plaintext.txt.bz2 rename to tests/data/integration/plaintext.txt.bz2 diff --git a/cramjam-python/tests/data/integration/plaintext.txt.gz b/tests/data/integration/plaintext.txt.gz similarity index 100% rename from cramjam-python/tests/data/integration/plaintext.txt.gz rename to tests/data/integration/plaintext.txt.gz diff --git a/cramjam-python/tests/data/integration/plaintext.txt.lz4 b/tests/data/integration/plaintext.txt.lz4 similarity index 100% rename from cramjam-python/tests/data/integration/plaintext.txt.lz4 rename to tests/data/integration/plaintext.txt.lz4 diff --git a/cramjam-python/tests/data/integration/plaintext.txt.lzma b/tests/data/integration/plaintext.txt.lzma similarity index 100% rename from cramjam-python/tests/data/integration/plaintext.txt.lzma rename to tests/data/integration/plaintext.txt.lzma diff --git a/cramjam-python/tests/data/integration/plaintext.txt.snappy b/tests/data/integration/plaintext.txt.snappy similarity index 100% rename from cramjam-python/tests/data/integration/plaintext.txt.snappy rename to tests/data/integration/plaintext.txt.snappy diff --git a/cramjam-python/tests/data/integration/plaintext.txt.zst b/tests/data/integration/plaintext.txt.zst similarity index 100% rename from cramjam-python/tests/data/integration/plaintext.txt.zst rename to tests/data/integration/plaintext.txt.zst diff --git a/cramjam-python/tests/test_blosc2.py b/tests/test_blosc2.py similarity index 100% rename from cramjam-python/tests/test_blosc2.py rename to tests/test_blosc2.py diff --git a/cramjam-python/tests/test_integration.py b/tests/test_integration.py similarity index 100% rename from cramjam-python/tests/test_integration.py rename to tests/test_integration.py diff --git a/cramjam-python/tests/test_rust_io.py b/tests/test_rust_io.py similarity index 100% rename from cramjam-python/tests/test_rust_io.py rename to tests/test_rust_io.py diff --git a/cramjam-python/tests/test_variants.py b/tests/test_variants.py similarity index 100% rename from cramjam-python/tests/test_variants.py rename to tests/test_variants.py