Miscellaneous CI, dependency, and version fixes (#1151)

pytorch · Jul 9, 2024 · 37636a8 · 37636a8
1 parent 069b12b
commit 37636a8
Show file tree

Hide file tree

Showing 12 changed files with 104 additions and 40 deletions.
diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml
@@ -39,7 +39,7 @@ jobs:
         run: python -m pip install --upgrade pip
       - name: Install dependencies
         run: |
-          python -m pip install torch
+          python -m pip install torch torchvision
           python -m pip install -e .
           cd docs
           python -m pip install -r requirements.txt
@@ -108,21 +108,21 @@ jobs:
         run: |
           git remote set-url origin https://pytorchbot:${GITHUB_PYTORCHBOT_TOKEN}@github.com/pytorch/torchtune.git
           set -euo pipefail
-          
-          # Convert refs/tags/v1.12.0rc3 into 1.12. 
+
+          # Convert refs/tags/v1.12.0rc3 into 1.12.
           # Adopted from https://github.com/pytorch/pytorch/blob/main/.github/workflows/_docs.yml#L150C11-L155C13
-          GITHUB_REF=${{ github.ref }} 
+          GITHUB_REF=${{ github.ref }}
           if [[ "${GITHUB_REF}" =~ ^refs/tags/v([0-9]+\.[0-9]+)\.* ]]; then
             TARGET_FOLDER="${BASH_REMATCH[1]}"
           else
             TARGET_FOLDER="main"
           fi
- 
+
           echo "Target Folder: ${TARGET_FOLDER}"
           mkdir -p "${TARGET_FOLDER}"
           rm -rf "${TARGET_FOLDER}"/*
           mv docs/* "${TARGET_FOLDER}"
-          
+
           git config user.name 'pytorchbot'
           git config user.email 'soumith+bot@pytorch.org'
           git add "${TARGET_FOLDER}" || true

diff --git a/.github/workflows/recipe_test_multi_gpu.yaml → .github/workflows/gpu_test.yaml b/.github/workflows/recipe_test_multi_gpu.yaml → .github/workflows/gpu_test.yaml
@@ -1,4 +1,4 @@
-name: Multi-GPU Recipe Tests
+name: GPU tests
 
 on:
   push:
@@ -7,7 +7,7 @@ on:
   workflow_dispatch:
 
 concurrency:
-  group: recipe-test-multi-gpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
+  group: gpu-test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
   cancel-in-progress: true
 
 permissions:
@@ -19,7 +19,7 @@ defaults:
     shell: bash -l -eo pipefail {0}
 
 jobs:
-  recipe_test_multi_gpu:
+  gpu_test:
     runs-on: linux.8xlarge.nvidia.gpu
     strategy:
       matrix:
@@ -39,15 +39,15 @@ jobs:
         run: python -m pip install --upgrade pip
       - name: Install torch nightly
         if: ${{ matrix.torch-version == 'nightly' }}
-        run: python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu118
+        run: python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu118
       - name: Install torch stable
         if: ${{ matrix.torch-version == 'stable' }}
-        run: python -m pip install torch
+        run: python -m pip install torch torchvision
       - name: Install remaining dependencies
         run: |
           python -m pip install -e ".[dev]"
           python -m pip install lm-eval==0.4.*
-      - name: Run recipe tests with coverage
-        run: pytest tests -m integration_test --cov=. --cov-report=xml --durations=20 -vv
+      - name: Run recipe and unit tests with coverage
+        run: pytest tests --with-integration --cov=. --cov-report=xml --durations=20 -vv
       - name: Upload Coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/.github/workflows/recipe_test.yaml b/.github/workflows/recipe_test.yaml
@@ -39,7 +39,7 @@ jobs:
         run: python -m pip install --upgrade pip
       - name: Install dependencies
         run: |
-          python -m pip install torch
+          python -m pip install torch torchvision
           python -m pip install -e ".[dev]"
           python -m pip install lm-eval==0.4.*
       - name: Run recipe tests with coverage

diff --git a/.github/workflows/recipe_test_nightly.yaml b/.github/workflows/recipe_test_nightly.yaml
@@ -4,6 +4,7 @@ on:
   schedule:
     # Runs at midnight every day
     - cron:  '0 0 * * *'
+  workflow_dispatch:
 
 concurrency:
   group: recipe-test-nightly-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
@@ -38,14 +39,17 @@ jobs:
         run: python -m pip install --upgrade pip
       - name: Install torch nightly
         if: ${{ matrix.torch-version == 'nightly' }}
-        run: python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu118
+        run: python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu121
       - name: Install torch stable
         if: ${{ matrix.torch-version == 'stable' }}
-        run: python -m pip install torch
+        run: python -m pip install torch torchvision
       - name: Install remaining dependencies
         run: |
           python -m pip install -e ".[dev]"
           python -m pip install lm-eval==0.4.*
+      - name: Install torchao nightly
+        if: ${{ matrix.torch-version == 'nightly' }}
+        run: pip install --pre torchao-nightly --index-url https://download.pytorch.org/whl/nightly/cu121
       - name: Run recipe tests with coverage
         run: pytest tests -m integration_test --cov=. --cov-report=xml --durations=20 -vv
       - name: Upload Coverage to Codecov

diff --git a/.github/workflows/regression_test.yaml b/.github/workflows/regression_test.yaml
@@ -47,10 +47,10 @@ jobs:
           python3 -m pip install awscli==1.32.6
       - name: Install torch nightly
         if: ${{ matrix.torch-version == 'nightly' }}
-        run: python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu118
+        run: python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu118
       - name: Install torch stable
         if: ${{ matrix.torch-version == 'stable' }}
-        run: python -m pip install torch
+        run: python -m pip install torch torchvision
       - name: Install remaining dependencies
         run: |
           python -m pip install -e ".[dev]"

diff --git a/.github/workflows/unit_test.yaml b/.github/workflows/unit_test.yaml
@@ -33,7 +33,7 @@ jobs:
         run: python -m pip install --upgrade pip
       - name: Install dependencies
         run: |
-          python -m pip install torch
+          python -m pip install torch torchvision
           python -m pip install -e ".[dev]"
       - name: Run unit tests with coverage
         run: pytest tests --cov=. --cov-report=xml --durations=20 -vv

diff --git a/README.md b/README.md
@@ -156,7 +156,15 @@ You can find a full list of all our Llama3 configs [here.](recipes/configs/llama
 
 ## Installation
 
-**Step 1:** [Install PyTorch](https://pytorch.org/get-started/locally/). torchtune is tested with the latest stable PyTorch release as well as the preview nightly version.
+**Step 1:** [Install PyTorch](https://pytorch.org/get-started/locally/). torchtune is tested with the latest stable PyTorch release as well as the preview nightly version. For fine-tuning the multimodal LLMs available in the repo, you'll need to install torchvision as well.
+
+```
+# Install stable version of PyTorch using pip
+pip install torch torchvision
+
+# Nightly install for latest features
+pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu121
+```
 
 **Step 2:** The latest stable version of torchtune is hosted on PyPI and can be downloaded with the following command:
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -10,8 +10,6 @@ authors = [
 ]
 keywords = ["pytorch", "finetuning", "llm"]
 dependencies = [
-    # multimodality
-    "torchvision",
 
     # Hugging Face integrations
     "datasets",

diff --git a/tests/torchtune/utils/test_distributed.py b/tests/torchtune/utils/test_distributed.py
@@ -262,6 +262,10 @@ def world_size(self) -> int:
         return 2
 
     @gpu_test(gpu_count=2)
+    @pytest.mark.skipif(
+        version.parse(torch.__version__).base_version < "2.4.0",
+        reason="torch >= 2.4 required",
+    )
     def test_lora_state_dict(self):
         rank = self.rank
         is_rank_zero = rank == 0

diff --git a/torchtune/modules/low_precision/_register_nf4_dispatch_ops.py b/torchtune/modules/low_precision/_register_nf4_dispatch_ops.py
@@ -4,14 +4,9 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from importlib.metadata import PackageNotFoundError, version
-
 import torch
 from torchao.dtypes.nf4tensor import implements as nf4_tensor_impl, to_nf4
-
-
-def is_fbcode():
-    return not hasattr(torch.version, "git_version")
+from torchtune.modules.low_precision._utils import _get_torchao_version
 
 
 @nf4_tensor_impl([torch.ops.aten.clone.default])
@@ -26,17 +21,12 @@ def clone(func, *args, **kwargs):
 
 
 should_define_inplace_copy = True
-if not is_fbcode():
-    try:
-        ao_version = version("torchao")
-        should_define_inplace_copy = ao_version < "0.2.0"
-    # For importlib metadata, need to check nightly separately
-    except PackageNotFoundError:
-        ao_version = version("torchao-nightly")
-        should_define_inplace_copy = ao_version < "2024.5.20"
-    except Exception as e:
-        raise PackageNotFoundError("Could not find torchao version") from e
-
+ao_version, is_nightly = _get_torchao_version()
+if ao_version:
+    if (is_nightly and ao_version >= "2024.5.20") or (
+        not is_nightly and ao_version >= "0.2.0"
+    ):
+        should_define_inplace_copy = False
 
 if should_define_inplace_copy:
     # TorchAO have `NF4.copy_` starting from `0.2.0`

diff --git a/torchtune/modules/low_precision/_utils.py b/torchtune/modules/low_precision/_utils.py
@@ -0,0 +1,53 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from importlib.metadata import PackageNotFoundError, version
+from typing import Optional, Tuple
+
+import torch
+
+import torchao
+
+
+def _is_fbcode():
+    return not hasattr(torch.version, "git_version")
+
+
+def _get_torchao_version() -> Tuple[Optional[str], Optional[bool]]:
+    """
+    Get torchao version. Returns a tuple of two elements, the first element
+    is the version string, the second element is whether it's a nightly version.
+    For fbcode usage, return None, None.
+
+    Checks:
+        1) is_fbcode, then
+        2) importlib's version(torchao-nightly) for nightlies, then
+        3) torchao.__version__ (only defined for torchao >= 0.3.0), then
+        4) importlib's version(torchao) for non-nightly
+
+
+    If none of these work, raise an error.
+
+    """
+    if _is_fbcode():
+        return None, None
+    # Check for nightly install first
+    try:
+        ao_version = version("torchao-nightly")
+        is_nightly = True
+    except PackageNotFoundError:
+        try:
+            ao_version = torchao.__version__
+            is_nightly = False
+        except AttributeError:
+            ao_version = "unknown"
+    if ao_version == "unknown":
+        try:
+            ao_version = version("torchao")
+            is_nightly = False
+        except Exception as e:
+            raise PackageNotFoundError("Could not find torchao version") from e
+    return ao_version, is_nightly
diff --git a/torchtune/utils/quantization.py b/torchtune/utils/quantization.py
@@ -10,10 +10,17 @@
 from torchao.quantization.quant_api import (
     Int4WeightOnlyGPTQQuantizer,
     Int4WeightOnlyQuantizer,
-    quantize,
     Quantizer,
 )
 
+from torchtune.modules.low_precision._utils import _get_torchao_version
+
+ao_version, is_nightly = _get_torchao_version()
+if is_nightly and (ao_version >= "2024.7.3"):
+    from torchao.quantization.quant_api import quantize_ as quantize
+else:
+    from torchao.quantization.quant_api import quantize
+
 # importing TORCH_VERSION_AFTER_2_3 because `Int8DynActInt4WeightQuantizer`
 # is only available after 2.3 so we have to guard the pytorch versions to decide
 # the list of supported quantizers