Skip to content

Commit

Permalink
Build: Expand CUDA Toolkit Matrix (#1111)
Browse files Browse the repository at this point in the history
* (ci) build with wider CUDA version matrix

* (ci) build with wider CUDA version matrix

* (ci) skip sm_89 target on CUDA 11.7

* (ci) skip sm_90 target on CUDA 11.8

* modify workflow to publish to test.pypi

* (build) Test for manylinux_2_24 build on GH actions

* (build) got that backwards.

* try fixing manual triggering condition for testpypi

* try if Ubuntu 18.04 is an easy fix to allow for `manylinux_2_24` compatibility

* hardcode publish step to run to test publishing

* set ubuntu to newest supported version

* try statically linking libstdc++ to achieve manylinux_2_18

* last commit only brought us to manylinux_2_34, reverse

* add misssing permission for publishing to pypi

* snake case deprecated in favor of kebab

* downgrade cuda ubuntu aiming for manylinux_2_24

* add step to upgrade cmake due to old Ubuntu for CUDA build

* adjust path to prefer pip installed cmake

* (cmake) set CMAKE_BUILD_TYPE=Release if unspecified

* default to CMAKE_BUILD_TYPE Release for optimized releases and better many_linux compatibility

* (build) back to ubuntu22.04 docker images

* verify Cmake in separte step

* add clarifying comment about Python version compatibility

* (build) we don't need cmake for wheel step

* fixup testpypi publish to run in PR for testing

* add pypi publishing when tagged on main

* add functionality to rewrite platform tags

* (ci) adjust platform tags for wheels

* fix for windows, get order right.

* fix for windows, get order right.

* (build) slim down those fatbins on windows cuda

* sloppy

* remove broken PyPi upload for now

---------

Co-authored-by: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
  • Loading branch information
matthewdouglas and Titus-von-Koeller authored Mar 8, 2024
1 parent ac5d6ee commit 1cfc277
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 24 deletions.
43 changes: 20 additions & 23 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ on:
- 'pytest.ini'
release:
types: [ published ]
workflow_dispatch: {} # Allow manual trigger

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
Expand Down Expand Up @@ -82,10 +83,12 @@ jobs:
matrix:
os: [ubuntu-latest, windows-latest]
arch: [x86_64, aarch64]
cuda_version: ['12.1.0']
cuda_version: ["11.7.1", "11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2"]
exclude:
- os: windows-latest # This probably requires arm64 Windows agents
arch: aarch64
- os: ubuntu-latest # Temporary. Takes too long, not ready yet.
arch: aarch64
runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
steps:
# Check out code
Expand Down Expand Up @@ -121,6 +124,9 @@ jobs:
set -ex
build_os=${{ matrix.os }}
build_arch=${{ matrix.arch }}
build_capability="50;52;60;61;70;75;80;86;89;90"
[[ "${{ matrix.cuda_version }}" == 11.7.* ]] && build_capability=${build_capability%??????}
[[ "${{ matrix.cuda_version }}" == 11.8.* ]] && build_capability=${build_capability%???}
[[ "${{ matrix.os }}" = windows-* ]] && python3 -m pip install ninja
for NO_CUBLASLT in ON OFF; do
if [ ${build_os:0:6} == ubuntu ]; then
Expand All @@ -129,10 +135,10 @@ jobs:
docker run --platform linux/$build_arch -i -w /src -v $PWD:/src $image sh -c \
"apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \
&& cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"50;52;60;61;70;75;80;86;89;90\" -DNO_CUBLASLT=${NO_CUBLASLT} . \
&& cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" -DNO_CUBLASLT=${NO_CUBLASLT} . \
&& cmake --build ."
else
cmake -G Ninja -DCOMPUTE_BACKEND=cuda -DNO_CUBLASLT=${NO_CUBLASLT} -DCMAKE_BUILD_TYPE=Release -S .
cmake -G Ninja -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY="${build_capability}" -DNO_CUBLASLT=${NO_CUBLASLT} -DCMAKE_BUILD_TYPE=Release -S .
cmake --build . --config Release
fi
done
Expand All @@ -151,7 +157,10 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.9", "3.10", "3.11", "3.12"]
# The specific Python version is irrelevant in this context as we are only packaging non-C extension
# code. This ensures compatibility across Python versions, including Python 3.8, as compatibility is
# dictated by the packaged code itself, not the Python version used for packaging.
python-version: ["3.10"]
arch: [x86_64, aarch64]
exclude:
- os: windows-latest # This probably requires arm64 Windows agents
Expand Down Expand Up @@ -192,27 +201,15 @@ jobs:
- name: Build wheel
shell: bash
run: python -m build .
- name: Determine and Set Platform Tag, then Tag Wheel
shell: bash
run: |
PLATFORM_TAG=$(python scripts/set_platform_tag.py ${{ matrix.arch }})
echo "PLATFORM_TAG=$PLATFORM_TAG"
wheel tags --remove --abi-tag=none --python-tag=py3 --platform-tag=$PLATFORM_TAG dist/bitsandbytes-*.whl
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
name: bdist_wheel_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.python-version }}
name: bdist_wheel_${{ matrix.os }}_${{ matrix.arch }}
path: dist/bitsandbytes-*.whl
retention-days: 7
publish:
needs: build-wheels
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download build artifact
uses: actions/download-artifact@v4
with:
path: dist/
merge-multiple: true
pattern: "bdist_wheel_*"
- run: |
ls -lR dist/
- name: Publish to PyPi
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.pypi }}
11 changes: 10 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ cmake_minimum_required(VERSION 3.22.1)

project(bitsandbytes LANGUAGES CXX)

# If run without specifying a build type, default to using the Release configuration:
# optimizing the generated binaries for performance and also adds the `-DNDEBUG` flag,
# which turns off a bunch of asserts which seem to link to new symbols in libstdc++,
# worsening our many_linux compliance..
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()

# Define included source files
set(CPP_FILES csrc/common.cpp csrc/cpu_ops.cpp csrc/pythonInterface.cpp)
set(CUDA_FILES csrc/ops.cu csrc/kernels.cu)
Expand Down Expand Up @@ -108,6 +116,7 @@ if(BUILD_CUDA)
endif()

string(APPEND CMAKE_CUDA_FLAGS " --use_fast_math")

if(PTXAS_VERBOSE)
# Verbose? Outputs register usage information, and other things...
string(APPEND CMAKE_CUDA_FLAGS " -Xptxas=-v")
Expand Down Expand Up @@ -220,4 +229,4 @@ if(MSVC)
set_target_properties(bitsandbytes PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG "${PROJECT_SOURCE_DIR}/bitsandbytes")
endif()

set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY bitsandbytes)
set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/bitsandbytes")
34 changes: 34 additions & 0 deletions scripts/set_platform_tag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import argparse
import platform
import sys


def get_platform_tag(architecture):
system = platform.system()

if system == "Linux":
tag = (
"manylinux_2_24_x86_64" if architecture == "x86_64" else "manylinux_2_24_aarch64"
)
elif system == "Darwin":
tag = "macosx_13_1_x86_64" if architecture == "x86_64" else "macosx_13_1_arm64"
elif system == "Windows":
tag = "win_amd64" if architecture == "x86_64" else "win_arm64"
else:
sys.exit(f"Unsupported system: {system}")

return tag


def main():
parser = argparse.ArgumentParser(description="Determine platform tag.")
parser.add_argument("arch", type=str, help="Architecture (e.g., x86_64, aarch64)")
args = parser.parse_args()

tag = get_platform_tag(args.arch)

print(tag) # This will be captured by the GitHub Actions workflow


if __name__ == "__main__":
main()

0 comments on commit 1cfc277

Please sign in to comment.