Skip to content

Commit

Permalink
Merge branch 'main' of github.com:pytorch/torchcodec into mac_wheels_ci
Browse files Browse the repository at this point in the history
  • Loading branch information
scotts committed Oct 23, 2024
2 parents a889d52 + bb29228 commit cece065
Show file tree
Hide file tree
Showing 11 changed files with 285 additions and 8 deletions.
144 changes: 144 additions & 0 deletions .github/workflows/linux_cuda_wheel.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
name: Build and test Linux CUDA wheels

on:
pull_request:
push:
branches:
- nightly
- main
- release/*
tags:
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true

permissions:
id-token: write
contents: write

defaults:
run:
shell: bash -l -eo pipefail {0}

jobs:
generate-matrix:
uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
with:
package-type: wheel
os: linux
test-infra-repository: pytorch/test-infra
test-infra-ref: main
with-cpu: disable
with-xpu: disable
with-rocm: disable
with-cuda: enable
build-python-only: "disable"
build:
needs: generate-matrix
strategy:
fail-fast: false
name: Build and Upload wheel
uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
with:
repository: pytorch/torchcodec
ref: ""
test-infra-repository: pytorch/test-infra
test-infra-ref: main
build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
post-script: packaging/post_build_script.sh
smoke-test-script: packaging/fake_smoke_test.py
package-name: torchcodec
trigger-event: ${{ github.event_name }}
build-platform: "python-build-package"
build-command: "BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 ENABLE_CUDA=1 python -m build --wheel -vvv --no-isolation"

install-and-test:
runs-on: linux.4xlarge.nvidia.gpu
strategy:
fail-fast: false
matrix:
# 3.9 corresponds to the minimum python version for which we build
# the wheel unless the label cliflow/binaries/all is present in the
# PR.
# For the actual release we should add that label and change this to
# include more python versions.
python-version: ['3.9']
cuda-version: ['11.8', '12.1', '12.4']
ffmpeg-version-for-tests: ['5', '6', '7']
container:
image: "pytorch/manylinux-builder:cuda${{ matrix.cuda-version }}"
options: "--gpus all -e NVIDIA_DRIVER_CAPABILITIES=video,compute,utility"
if: ${{ always() }}
needs: build
steps:
- name: Setup env vars
run: |
cuda_version_without_periods=$(echo "${{ matrix.cuda-version }}" | sed 's/\.//g')
echo cuda_version_without_periods=${cuda_version_without_periods} >> $GITHUB_ENV
- uses: actions/download-artifact@v3
with:
name: pytorch_torchcodec__3.9_cu${{ env.cuda_version_without_periods }}_x86_64
path: pytorch/torchcodec/dist/
- name: Setup miniconda using test-infra
uses: ahmadsharif1/test-infra/.github/actions/setup-miniconda@14bc3c29f88d13b0237ab4ddf00aa409e45ade40
with:
python-version: ${{ matrix.python-version }}
default-packages: "conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }}"
- name: Check env
run: |
${CONDA_RUN} env
${CONDA_RUN} conda info
${CONDA_RUN} nvidia-smi
- name: Update pip
run: ${CONDA_RUN} python -m pip install --upgrade pip
- name: Install PyTorch
run: |
${CONDA_RUN} python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu${{ env.cuda_version_without_periods }}
${CONDA_RUN} python -c 'import torch; print(f"{torch.__version__}"); print(f"{torch.__file__}"); print(f"{torch.cuda.is_available()=}")'
- name: Install torchcodec from the wheel
run: |
wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"`
echo Installing $wheel_path
${CONDA_RUN} python -m pip install $wheel_path -vvv
- name: Check out repo
uses: actions/checkout@v3

- name: Install cuda runtime dependencies
run: |
# For some reason nvidia::libnpp=12.4 doesn't install but nvidia/label/cuda-12.4.0::libnpp does.
# So we use the latter convention for libnpp.
${CONDA_RUN} conda install --yes nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }}
- name: Install test dependencies
run: |
${CONDA_RUN} python -m pip install --pre torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
# Ideally we would find a way to get those dependencies from pyproject.toml
${CONDA_RUN} python -m pip install numpy pytest pillow
- name: Delete the src/ folder just for fun
run: |
# The only reason we checked-out the repo is to get access to the
# tests. We don't care about the rest. Out of precaution, we delete
# the src/ folder to be extra sure that we're running the code from
# the installed wheel rather than from the source.
# This is just to be extra cautious and very overkill because a)
# there's no way the `torchcodec` package from src/ can be found from
# the PythonPath: the main point of `src/` is precisely to protect
# against that and b) if we ever were to execute code from
# `src/torchcodec`, it would fail loudly because the built .so files
# aren't present there.
rm -r src/
ls
- name: Smoke test
run: |
${CONDA_RUN} python test/decoders/manual_smoke_test.py
- name: Run Python tests
run: |
# We skip test_get_ffmpeg_version because it may not have a micro version.
${CONDA_RUN} FAIL_WITHOUT_CUDA=1 pytest test -k "not test_get_ffmpeg_version" -vvv
- name: Run Python benchmark
run: |
${CONDA_RUN} time python benchmarks/decoders/gpu_benchmark.py --devices=cuda:0,cpu --resize_devices=none
9 changes: 8 additions & 1 deletion packaging/post_build_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,14 @@ else
exit 1
fi

for ffmpeg_major_version in 4 5 6 7; do
ffmpeg_versions=(4 5 6 7)

# TODO: Make ffmpeg4 work with nvcc.
if [ "$ENABLE_CUDA" -eq 1 ]; then
ffmpeg_versions=(5 6 7)
fi

for ffmpeg_major_version in ${ffmepg_versions[@]}; do
assert_in_wheel $wheel_path torchcodec/libtorchcodec${ffmpeg_major_version}.${ext}
done
assert_not_in_wheel $wheel_path libtorchcodec.${ext}
Expand Down
15 changes: 10 additions & 5 deletions src/torchcodec/decoders/_core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ function(make_torchcodec_library library_name ffmpeg_target)
set(NEEDED_LIBRARIES ${ffmpeg_target} ${TORCH_LIBRARIES}
${Python3_LIBRARIES})
if(ENABLE_CUDA)
list(APPEND NEEDED_LIBRARIES ${CUDA_CUDA_LIBRARY}
list(APPEND NEEDED_LIBRARIES
${CUDA_nppi_LIBRARY} ${CUDA_nppicc_LIBRARY} )
endif()
target_link_libraries(
Expand Down Expand Up @@ -76,10 +76,15 @@ if(DEFINED ENV{BUILD_AGAINST_ALL_FFMPEG_FROM_S3})
${CMAKE_CURRENT_SOURCE_DIR}/fetch_and_expose_non_gpl_ffmpeg_libs.cmake
)

make_torchcodec_library(libtorchcodec4 ffmpeg4)
make_torchcodec_library(libtorchcodec5 ffmpeg5)
make_torchcodec_library(libtorchcodec6 ffmpeg6)
make_torchcodec_library(libtorchcodec7 ffmpeg7)

if(NOT ENABLE_CUDA)
# TODO: Enable more ffmpeg versions for cuda.
make_torchcodec_library(libtorchcodec4 ffmpeg4)
endif()
make_torchcodec_library(libtorchcodec7 ffmpeg7)
make_torchcodec_library(libtorchcodec6 ffmpeg6)
make_torchcodec_library(libtorchcodec5 ffmpeg5)

else()
message(
STATUS
Expand Down
47 changes: 47 additions & 0 deletions src/torchcodec/decoders/_core/VideoDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1073,6 +1073,53 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesAtIndices(
return output;
}

VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesDisplayedByTimestamps(
int streamIndex,
const std::vector<double>& timestamps) {
validateUserProvidedStreamIndex(streamIndex);
validateScannedAllStreams("getFramesDisplayedByTimestamps");

// The frame displayed at timestamp t and the one displayed at timestamp `t +
// eps` are probably the same frame, with the same index. The easiest way to
// avoid decoding that unique frame twice is to convert the input timestamps
// to indices, and leverage the de-duplication logic of getFramesAtIndices.
// This means this function requires a scan.
// TODO: longer term, we should implement this without requiring a scan

const auto& streamMetadata = containerMetadata_.streams[streamIndex];
const auto& stream = streams_[streamIndex];
double minSeconds = streamMetadata.minPtsSecondsFromScan.value();
double maxSeconds = streamMetadata.maxPtsSecondsFromScan.value();

std::vector<int64_t> frameIndices(timestamps.size());
for (auto i = 0; i < timestamps.size(); ++i) {
auto framePts = timestamps[i];
TORCH_CHECK(
framePts >= minSeconds && framePts < maxSeconds,
"frame pts is " + std::to_string(framePts) + "; must be in range [" +
std::to_string(minSeconds) + ", " + std::to_string(maxSeconds) +
").");

auto it = std::lower_bound(
stream.allFrames.begin(),
stream.allFrames.end(),
framePts,
[&stream](const FrameInfo& info, double framePts) {
return ptsToSeconds(info.nextPts, stream.timeBase) <= framePts;
});
int64_t frameIndex = it - stream.allFrames.begin();
// If the frame index is larger than the size of allFrames, that means we
// couldn't match the pts value to the pts value of a NEXT FRAME. And
// that means that this timestamp falls during the time between when the
// last frame is displayed, and the video ends. Hence, it should map to the
// index of the last frame.
frameIndex = std::min(frameIndex, (int64_t)stream.allFrames.size() - 1);
frameIndices[i] = frameIndex;
}

return getFramesAtIndices(streamIndex, frameIndices);
}

VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesInRange(
int streamIndex,
int64_t start,
Expand Down
6 changes: 6 additions & 0 deletions src/torchcodec/decoders/_core/VideoDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ class VideoDecoder {
// i.e. it will be returned when this function is called with seconds=5.0 or
// seconds=5.999, etc.
DecodedOutput getFrameDisplayedAtTimestampNoDemux(double seconds);

DecodedOutput getFrameAtIndex(
int streamIndex,
int64_t frameIndex,
Expand All @@ -242,6 +243,11 @@ class VideoDecoder {
BatchDecodedOutput getFramesAtIndices(
int streamIndex,
const std::vector<int64_t>& frameIndices);

BatchDecodedOutput getFramesDisplayedByTimestamps(
int streamIndex,
const std::vector<double>& timestamps);

// Returns frames within a given range for a given stream as a single stacked
// Tensor. The range is defined by [start, stop). The values retrieved from
// the range are:
Expand Down
13 changes: 13 additions & 0 deletions src/torchcodec/decoders/_core/VideoDecoderOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ TORCH_LIBRARY(torchcodec_ns, m) {
"get_frames_in_range(Tensor(a!) decoder, *, int stream_index, int start, int stop, int? step=None) -> (Tensor, Tensor, Tensor)");
m.def(
"get_frames_by_pts_in_range(Tensor(a!) decoder, *, int stream_index, float start_seconds, float stop_seconds) -> (Tensor, Tensor, Tensor)");
m.def(
"get_frames_by_pts(Tensor(a!) decoder, *, int stream_index, float[] timestamps) -> (Tensor, Tensor, Tensor)");
m.def("get_json_metadata(Tensor(a!) decoder) -> str");
m.def("get_container_json_metadata(Tensor(a!) decoder) -> str");
m.def(
Expand Down Expand Up @@ -240,6 +242,16 @@ OpsBatchDecodedOutput get_frames_in_range(
stream_index, start, stop, step.value_or(1));
return makeOpsBatchDecodedOutput(result);
}
OpsBatchDecodedOutput get_frames_by_pts(
at::Tensor& decoder,
int64_t stream_index,
at::ArrayRef<double> timestamps) {
auto videoDecoder = unwrapTensorToGetDecoder(decoder);
std::vector<double> timestampsVec(timestamps.begin(), timestamps.end());
auto result =
videoDecoder->getFramesDisplayedByTimestamps(stream_index, timestampsVec);
return makeOpsBatchDecodedOutput(result);
}

OpsBatchDecodedOutput get_frames_by_pts_in_range(
at::Tensor& decoder,
Expand Down Expand Up @@ -479,6 +491,7 @@ TORCH_LIBRARY_IMPL(torchcodec_ns, CPU, m) {
m.impl("get_frames_at_indices", &get_frames_at_indices);
m.impl("get_frames_in_range", &get_frames_in_range);
m.impl("get_frames_by_pts_in_range", &get_frames_by_pts_in_range);
m.impl("get_frames_by_pts", &get_frames_by_pts);
m.impl("_test_frame_pts_equality", &_test_frame_pts_equality);
m.impl(
"scan_all_streams_to_update_metadata",
Expand Down
9 changes: 7 additions & 2 deletions src/torchcodec/decoders/_core/VideoDecoderOps.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ using OpsBatchDecodedOutput = std::tuple<at::Tensor, at::Tensor, at::Tensor>;
// given timestamp T has T >= PTS and T < PTS + Duration.
OpsDecodedOutput get_frame_at_pts(at::Tensor& decoder, double seconds);

// Return the frames at given ptss for a given stream
OpsBatchDecodedOutput get_frames_by_pts(
at::Tensor& decoder,
int64_t stream_index,
at::ArrayRef<double> timestamps);

// Return the frame that is visible at a given index in the video.
OpsDecodedOutput get_frame_at_index(
at::Tensor& decoder,
Expand All @@ -85,8 +91,7 @@ OpsDecodedOutput get_frame_at_index(
// duration as tensors.
OpsDecodedOutput get_next_frame(at::Tensor& decoder);

// Return the frames at a given index for a given stream as a single stacked
// Tensor.
// Return the frames at given indices for a given stream
OpsBatchDecodedOutput get_frames_at_indices(
at::Tensor& decoder,
int64_t stream_index,
Expand Down
1 change: 1 addition & 0 deletions src/torchcodec/decoders/_core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
get_frame_at_index,
get_frame_at_pts,
get_frames_at_indices,
get_frames_by_pts,
get_frames_by_pts_in_range,
get_frames_in_range,
get_json_metadata,
Expand Down
16 changes: 16 additions & 0 deletions src/torchcodec/decoders/_core/video_decoder_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def load_torchcodec_extension():
get_frame_at_pts = torch.ops.torchcodec_ns.get_frame_at_pts.default
get_frame_at_index = torch.ops.torchcodec_ns.get_frame_at_index.default
get_frames_at_indices = torch.ops.torchcodec_ns.get_frames_at_indices.default
get_frames_by_pts = torch.ops.torchcodec_ns.get_frames_by_pts.default
get_frames_in_range = torch.ops.torchcodec_ns.get_frames_in_range.default
get_frames_by_pts_in_range = torch.ops.torchcodec_ns.get_frames_by_pts_in_range.default
get_json_metadata = torch.ops.torchcodec_ns.get_json_metadata.default
Expand Down Expand Up @@ -172,6 +173,21 @@ def get_frame_at_pts_abstract(
)


@register_fake("torchcodec_ns::get_frames_by_pts")
def get_frames_by_pts_abstract(
decoder: torch.Tensor,
*,
stream_index: int,
timestamps: List[float],
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
return (
torch.empty(image_size),
torch.empty([], dtype=torch.float),
torch.empty([], dtype=torch.float),
)


@register_fake("torchcodec_ns::get_frame_at_index")
def get_frame_at_index_abstract(
decoder: torch.Tensor, *, stream_index: int, frame_index: int
Expand Down
Loading

0 comments on commit cece065

Please sign in to comment.