diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index a25f53f46..c868b18d2 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -17,6 +17,7 @@ on: - 'pytest.ini' release: types: [ published ] + workflow_dispatch: {} # Allow manual trigger concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} @@ -82,10 +83,12 @@ jobs: matrix: os: [ubuntu-latest, windows-latest] arch: [x86_64, aarch64] - cuda_version: ['12.1.0'] + cuda_version: ["11.7.1", "11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2"] exclude: - os: windows-latest # This probably requires arm64 Windows agents arch: aarch64 + - os: ubuntu-latest # Temporary. Takes too long, not ready yet. + arch: aarch64 runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents steps: # Check out code @@ -121,6 +124,9 @@ jobs: set -ex build_os=${{ matrix.os }} build_arch=${{ matrix.arch }} + build_capability="50;52;60;61;70;75;80;86;89;90" + [[ "${{ matrix.cuda_version }}" == 11.7.* ]] && build_capability=${build_capability%??????} + [[ "${{ matrix.cuda_version }}" == 11.8.* ]] && build_capability=${build_capability%???} [[ "${{ matrix.os }}" = windows-* ]] && python3 -m pip install ninja for NO_CUBLASLT in ON OFF; do if [ ${build_os:0:6} == ubuntu ]; then @@ -129,10 +135,10 @@ jobs: docker run --platform linux/$build_arch -i -w /src -v $PWD:/src $image sh -c \ "apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ - && cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"50;52;60;61;70;75;80;86;89;90\" -DNO_CUBLASLT=${NO_CUBLASLT} . \ + && cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" -DNO_CUBLASLT=${NO_CUBLASLT} . \ && cmake --build ." else - cmake -G Ninja -DCOMPUTE_BACKEND=cuda -DNO_CUBLASLT=${NO_CUBLASLT} -DCMAKE_BUILD_TYPE=Release -S . + cmake -G Ninja -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY="${build_capability}" -DNO_CUBLASLT=${NO_CUBLASLT} -DCMAKE_BUILD_TYPE=Release -S . cmake --build . --config Release fi done @@ -151,7 +157,10 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.9", "3.10", "3.11", "3.12"] + # The specific Python version is irrelevant in this context as we are only packaging non-C extension + # code. This ensures compatibility across Python versions, including Python 3.8, as compatibility is + # dictated by the packaged code itself, not the Python version used for packaging. + python-version: ["3.10"] arch: [x86_64, aarch64] exclude: - os: windows-latest # This probably requires arm64 Windows agents @@ -192,27 +201,15 @@ jobs: - name: Build wheel shell: bash run: python -m build . + - name: Determine and Set Platform Tag, then Tag Wheel + shell: bash + run: | + PLATFORM_TAG=$(python scripts/set_platform_tag.py ${{ matrix.arch }}) + echo "PLATFORM_TAG=$PLATFORM_TAG" + wheel tags --remove --abi-tag=none --python-tag=py3 --platform-tag=$PLATFORM_TAG dist/bitsandbytes-*.whl - name: Upload build artifact uses: actions/upload-artifact@v4 with: - name: bdist_wheel_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.python-version }} + name: bdist_wheel_${{ matrix.os }}_${{ matrix.arch }} path: dist/bitsandbytes-*.whl retention-days: 7 - publish: - needs: build-wheels - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Download build artifact - uses: actions/download-artifact@v4 - with: - path: dist/ - merge-multiple: true - pattern: "bdist_wheel_*" - - run: | - ls -lR dist/ - - name: Publish to PyPi - if: startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.pypi }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 62ff4e535..be0d3555f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,14 @@ cmake_minimum_required(VERSION 3.22.1) project(bitsandbytes LANGUAGES CXX) +# If run without specifying a build type, default to using the Release configuration: +# optimizing the generated binaries for performance and also adds the `-DNDEBUG` flag, +# which turns off a bunch of asserts which seem to link to new symbols in libstdc++, +# worsening our many_linux compliance.. +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + # Define included source files set(CPP_FILES csrc/common.cpp csrc/cpu_ops.cpp csrc/pythonInterface.cpp) set(CUDA_FILES csrc/ops.cu csrc/kernels.cu) @@ -108,6 +116,7 @@ if(BUILD_CUDA) endif() string(APPEND CMAKE_CUDA_FLAGS " --use_fast_math") + if(PTXAS_VERBOSE) # Verbose? Outputs register usage information, and other things... string(APPEND CMAKE_CUDA_FLAGS " -Xptxas=-v") @@ -220,4 +229,4 @@ if(MSVC) set_target_properties(bitsandbytes PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG "${PROJECT_SOURCE_DIR}/bitsandbytes") endif() -set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY bitsandbytes) +set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/bitsandbytes") diff --git a/scripts/set_platform_tag.py b/scripts/set_platform_tag.py new file mode 100644 index 000000000..ca561c880 --- /dev/null +++ b/scripts/set_platform_tag.py @@ -0,0 +1,34 @@ +import argparse +import platform +import sys + + +def get_platform_tag(architecture): + system = platform.system() + + if system == "Linux": + tag = ( + "manylinux_2_24_x86_64" if architecture == "x86_64" else "manylinux_2_24_aarch64" + ) + elif system == "Darwin": + tag = "macosx_13_1_x86_64" if architecture == "x86_64" else "macosx_13_1_arm64" + elif system == "Windows": + tag = "win_amd64" if architecture == "x86_64" else "win_arm64" + else: + sys.exit(f"Unsupported system: {system}") + + return tag + + +def main(): + parser = argparse.ArgumentParser(description="Determine platform tag.") + parser.add_argument("arch", type=str, help="Architecture (e.g., x86_64, aarch64)") + args = parser.parse_args() + + tag = get_platform_tag(args.arch) + + print(tag) # This will be captured by the GitHub Actions workflow + + +if __name__ == "__main__": + main()