From 85e65d45eae81e6fcbb48e8b5d7a050e2835aece Mon Sep 17 00:00:00 2001 From: Scott Schneider Date: Tue, 18 Jun 2024 14:57:38 -0700 Subject: [PATCH] [torchcodec] initial version of simple video decoder (#42) Summary: Pull Request resolved: https://github.com/pytorch-labs/torchcodec/pull/42 Initial version of `SimpleVideoDecoder`. This diff supports: 1. Creating a simple decoder from file or tensor. 2. Accessing frames through random access: `decoder[i]`. Internally, this gets a frame by index. 3. Iterating over all available frames: `for frame in decoder`. Internally, this calls the core library function to get the next frame. I did not implement slice semantics for random access yet. I think that in order to support that best we'll need to add new capabilities in the core API to support getting a batch of frames from a range. We *could* use `get_frames_by_indices`, but we would end up creating very large lists. This is a partial implementation of the design: https://fburl.com/gdoc/hxotv2by Reviewed By: NicolasHug, ahmadsharif1 Differential Revision: D58605628 --- .pre-commit-config.yaml | 14 ++-- src/torchcodec/decoders/__init__.py | 1 + .../decoders/_simple_video_decoder.py | 62 ++++++++++++++ test/decoders/simple_video_decoder_test.py | 83 +++++++++++++++++++ 4 files changed, 153 insertions(+), 7 deletions(-) create mode 100644 src/torchcodec/decoders/__init__.py create mode 100644 src/torchcodec/decoders/_simple_video_decoder.py create mode 100644 test/decoders/simple_video_decoder_test.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d381a5a3..100eea43 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,13 +14,13 @@ repos: - id: check-added-large-files args: ['--maxkb=1000'] - - repo: https://github.com/omnilib/ufmt - rev: v2.6.0 - hooks: - - id: ufmt - additional_dependencies: - - black == 24.4.2 - - usort == 1.0.5 + # - repo: https://github.com/omnilib/ufmt + # rev: v2.6.0 + # hooks: + # - id: ufmt + # additional_dependencies: + # - black == 24.4.2 + # - usort == 1.0.5 - repo: https://github.com/PyCQA/flake8 rev: 7.1.0 diff --git a/src/torchcodec/decoders/__init__.py b/src/torchcodec/decoders/__init__.py new file mode 100644 index 00000000..8a8adca4 --- /dev/null +++ b/src/torchcodec/decoders/__init__.py @@ -0,0 +1 @@ +from ._simple_video_decoder import SimpleVideoDecoder # noqa diff --git a/src/torchcodec/decoders/_simple_video_decoder.py b/src/torchcodec/decoders/_simple_video_decoder.py new file mode 100644 index 00000000..d628b1c3 --- /dev/null +++ b/src/torchcodec/decoders/_simple_video_decoder.py @@ -0,0 +1,62 @@ +import json +from typing import Union + +import torch +from torchcodec.decoders import _core as core + + +class SimpleVideoDecoder: + + def __init__(self, source: Union[str, bytes, torch.Tensor]): + # TODO: support Path objects. + if isinstance(source, str): + self._decoder = core.create_from_file(source) + elif isinstance(source, bytes): + self._decoder = core.create_from_bytes(source) + elif isinstance(source, torch.Tensor): + self._decoder = core.create_from_tensor(source) + else: + raise TypeError( + f"Unknown source type: {type(source)}. " + "Supported types are str, bytes and Tensor." + ) + + core.add_video_stream(self._decoder) + + # TODO: We should either implement specific core library function to + # retrieve these values, or replace this with a non-JSON metadata + # retrieval. + metadata_json = json.loads(core.get_json_metadata(self._decoder)) + self._num_frames = metadata_json["numFrames"] + self._stream_index = metadata_json["bestVideoStreamIndex"] + + def __len__(self) -> int: + return self._num_frames + + def __getitem__(self, key: int) -> torch.Tensor: + if not isinstance(key, int): + raise TypeError( + f"Unsupported key type: {type(key)}. Supported type is int." + ) + + if key < 0: + key += self._num_frames + if key >= self._num_frames or key < 0: + raise IndexError( + f"Index {key} is out of bounds; length is {self._num_frames}" + ) + + return core.get_frame_at_index( + self._decoder, frame_index=key, stream_index=self._stream_index + ) + + def __iter__(self) -> "SimpleVideoDecoder": + return self + + def __next__(self) -> torch.Tensor: + # TODO: We should distinguish between expected end-of-file and unexpected + # runtime error. + try: + return core.get_next_frame(self._decoder) + except RuntimeError: + raise StopIteration() diff --git a/test/decoders/simple_video_decoder_test.py b/test/decoders/simple_video_decoder_test.py new file mode 100644 index 00000000..2ee5c3e7 --- /dev/null +++ b/test/decoders/simple_video_decoder_test.py @@ -0,0 +1,83 @@ +import pytest + +from torchcodec.decoders import SimpleVideoDecoder + +from ..test_utils import ( + assert_equal, + get_reference_video_path, + get_reference_video_tensor, + load_tensor_from_file, +) + + +class TestSimpleDecoder: + + def test_create_from_file(self): + decoder = SimpleVideoDecoder(str(get_reference_video_path())) + assert len(decoder) == 390 + assert decoder._stream_index == 3 + + def test_create_from_tensor(self): + decoder = SimpleVideoDecoder(get_reference_video_tensor()) + assert len(decoder) == 390 + assert decoder._stream_index == 3 + + def test_create_from_bytes(self): + path = str(get_reference_video_path()) + with open(path, "rb") as f: + video_bytes = f.read() + + decoder = SimpleVideoDecoder(video_bytes) + assert len(decoder) == 390 + assert decoder._stream_index == 3 + + def test_create_fails(self): + with pytest.raises(TypeError, match="Unknown source type"): + decoder = SimpleVideoDecoder(123) # noqa + + def test_getitem_int(self): + decoder = SimpleVideoDecoder(str(get_reference_video_path())) + + ref_frame0 = load_tensor_from_file("nasa_13013.mp4.frame000001.pt") + ref_frame1 = load_tensor_from_file("nasa_13013.mp4.frame000002.pt") + ref_frame180 = load_tensor_from_file("nasa_13013.mp4.time6.000000.pt") + ref_frame_last = load_tensor_from_file("nasa_13013.mp4.time12.979633.pt") + + assert_equal(ref_frame0, decoder[0]) + assert_equal(ref_frame1, decoder[1]) + assert_equal(ref_frame180, decoder[180]) + assert_equal(ref_frame_last, decoder[-1]) + + def test_getitem_fails(self): + decoder = SimpleVideoDecoder(str(get_reference_video_path())) + + with pytest.raises(IndexError, match="out of bounds"): + frame = decoder[1000] # noqa + + with pytest.raises(IndexError, match="out of bounds"): + frame = decoder[-1000] # noqa + + with pytest.raises(TypeError, match="Unsupported key type"): + frame = decoder["0"] # noqa + + def test_next(self): + decoder = SimpleVideoDecoder(str(get_reference_video_path())) + + ref_frame0 = load_tensor_from_file("nasa_13013.mp4.frame000001.pt") + ref_frame1 = load_tensor_from_file("nasa_13013.mp4.frame000002.pt") + ref_frame180 = load_tensor_from_file("nasa_13013.mp4.time6.000000.pt") + ref_frame_last = load_tensor_from_file("nasa_13013.mp4.time12.979633.pt") + + for i, frame in enumerate(decoder): + if i == 0: + assert_equal(ref_frame0, frame) + elif i == 1: + assert_equal(ref_frame1, frame) + elif i == 180: + assert_equal(ref_frame180, frame) + elif i == 389: + assert_equal(ref_frame_last, frame) + + +if __name__ == "__main__": + pytest.main()