From 95bd592a8d51f3fd966a8ffcf37232be85cc4569 Mon Sep 17 00:00:00 2001 From: Keigh Rim Date: Wed, 26 Feb 2025 17:27:05 -0500 Subject: [PATCH 1/4] vdh now uses iterator, instead of list-popping when extracting frames --- mmif/utils/video_document_helper.py | 17 +++++++++-------- requirements.cv | 1 + 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/mmif/utils/video_document_helper.py b/mmif/utils/video_document_helper.py index 2bf021fc..a59bff09 100644 --- a/mmif/utils/video_document_helper.py +++ b/mmif/utils/video_document_helper.py @@ -1,7 +1,8 @@ import importlib import math import warnings -from typing import List, Union, Tuple +from typing import Iterable # todo: replace with collections.abc.Iterable in Python 3.9 +from typing import List, Union, Tuple, Iterator import mmif from mmif import Annotation, Document, Mmif @@ -67,36 +68,36 @@ def get_framerate(video_document: Document) -> float: return video_document.get_property(FPS_DOCPROP_KEY) -def extract_frames_as_images(video_document: Document, framenums: List[int], as_PIL: bool = False): +def extract_frames_as_images(video_document: Document, framenums: Iterable[int], as_PIL: bool = False): """ Extracts frames from a video document as a list of :py:class:`numpy.ndarray`. Use with :py:func:`sample_frames` function to get the list of frame numbers first. :param video_document: :py:class:`~mmif.serialize.annotation.Document` instance that holds a video document (``"@type": ".../VideoDocument/..."``) - :param framenums: integers representing the frame numbers to extract + :param framenums: iterable integers representing the frame numbers to extract :param as_PIL: return :py:class:`PIL.Image.Image` instead of :py:class:`~numpy.ndarray` :return: frames as a list of :py:class:`~numpy.ndarray` or :py:class:`~PIL.Image.Image` """ - import cv2 # pytype: disable=import-error if as_PIL: from PIL import Image frames = [] video = capture(video_document) cur_f = 0 tot_fcount = video_document.get_property(FRAMECOUNT_DOCPROP_KEY) - framenums_copy = framenums.copy() + framenumi = iter(framenums) # make sure that it's actually an iterator, in case a list is passed + next_target_f = next(framenumi, None) while True: - if not framenums_copy or cur_f > tot_fcount: + if next_target_f is None or cur_f > tot_fcount: break ret, frame = video.read() - if cur_f == framenums_copy[0]: + if cur_f == next_target_f: if not ret: sec = convert(cur_f, 'f', 's', video_document.get_property(FPS_DOCPROP_KEY)) warnings.warn(f'Frame #{cur_f} ({sec}s) could not be read from the video {video_document.id}.') cur_f += 1 continue frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame) - framenums_copy.pop(0) + next_target_f = next(framenumi, None) cur_f += 1 return frames diff --git a/requirements.cv b/requirements.cv index 47ef0a21..a2c1cfb9 100644 --- a/requirements.cv +++ b/requirements.cv @@ -1,3 +1,4 @@ pillow opencv-python ffmpeg-python +wurlitzer From 52762c0929ed46d5113dcf228467131c1a7c736a Mon Sep 17 00:00:00 2001 From: Keigh Rim Date: Wed, 26 Feb 2025 18:31:55 -0500 Subject: [PATCH 2/4] vdh frame extraction now redirects ffmpeg errors for downstream --- mmif/utils/video_document_helper.py | 47 ++++++++++++++++++----------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/mmif/utils/video_document_helper.py b/mmif/utils/video_document_helper.py index a59bff09..ed03707d 100644 --- a/mmif/utils/video_document_helper.py +++ b/mmif/utils/video_document_helper.py @@ -1,15 +1,16 @@ import importlib import math import warnings +from io import StringIO from typing import Iterable # todo: replace with collections.abc.Iterable in Python 3.9 -from typing import List, Union, Tuple, Iterator +from typing import List, Union, Tuple import mmif from mmif import Annotation, Document, Mmif from mmif.utils.timeunit_helper import convert from mmif.vocabulary import DocumentTypes -for cv_dep in ('cv2', 'ffmpeg', 'PIL'): +for cv_dep in ('cv2', 'ffmpeg', 'PIL', 'wurlitzer'): try: importlib.__import__(cv_dep) except ImportError as e: @@ -64,11 +65,13 @@ def get_framerate(video_document: Document) -> float: if k in video_document: fps = round(video_document.get_property(k), 2) return fps - capture(video_document) - return video_document.get_property(FPS_DOCPROP_KEY) + cap = capture(video_document) + fps = video_document.get_property(FPS_DOCPROP_KEY) + cap.release() + return fps -def extract_frames_as_images(video_document: Document, framenums: Iterable[int], as_PIL: bool = False): +def extract_frames_as_images(video_document: Document, framenums: Iterable[int], as_PIL: bool = False, record_ffmpeg_errors: bool = False): """ Extracts frames from a video document as a list of :py:class:`numpy.ndarray`. Use with :py:func:`sample_frames` function to get the list of frame numbers first. @@ -86,19 +89,27 @@ def extract_frames_as_images(video_document: Document, framenums: Iterable[int], tot_fcount = video_document.get_property(FRAMECOUNT_DOCPROP_KEY) framenumi = iter(framenums) # make sure that it's actually an iterator, in case a list is passed next_target_f = next(framenumi, None) - while True: - if next_target_f is None or cur_f > tot_fcount: - break - ret, frame = video.read() - if cur_f == next_target_f: - if not ret: - sec = convert(cur_f, 'f', 's', video_document.get_property(FPS_DOCPROP_KEY)) - warnings.warn(f'Frame #{cur_f} ({sec}s) could not be read from the video {video_document.id}.') - cur_f += 1 - continue - frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame) - next_target_f = next(framenumi, None) - cur_f += 1 + from wurlitzer import pipes as cpipes + ffmpeg_outs = StringIO() + ffmpeg_errs = StringIO() + with cpipes(stderr=ffmpeg_errs, stdout=ffmpeg_outs): + while True: + if next_target_f is None or cur_f > tot_fcount: + break + ret, frame = video.read() + if cur_f == next_target_f: + if not ret: + sec = convert(cur_f, 'f', 's', video_document.get_property(FPS_DOCPROP_KEY)) + warnings.warn(f'Frame #{cur_f} ({sec}s) could not be read from the video {video_document.id} @ {video_document.location} .') + cur_f += 1 + continue + frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame) + next_target_f = next(framenumi, None) + cur_f += 1 + ffmpeg_err_str = ffmpeg_errs.getvalue() + if ffmpeg_err_str and record_ffmpeg_errors: + warnings.warn(f'FFmpeg output during extracting frames: {ffmpeg_err_str}') + video.release() return frames From a12a17e4b13790f8ed1b41ec71a9fa036c9d0684 Mon Sep 17 00:00:00 2001 From: Keigh Rim Date: Wed, 26 Feb 2025 18:32:55 -0500 Subject: [PATCH 3/4] fixed bug of early terminating seeking video when frame extraction fails --- mmif/utils/video_document_helper.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mmif/utils/video_document_helper.py b/mmif/utils/video_document_helper.py index ed03707d..436d0761 100644 --- a/mmif/utils/video_document_helper.py +++ b/mmif/utils/video_document_helper.py @@ -101,9 +101,8 @@ def extract_frames_as_images(video_document: Document, framenums: Iterable[int], if not ret: sec = convert(cur_f, 'f', 's', video_document.get_property(FPS_DOCPROP_KEY)) warnings.warn(f'Frame #{cur_f} ({sec}s) could not be read from the video {video_document.id} @ {video_document.location} .') - cur_f += 1 - continue - frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame) + else: + frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame) next_target_f = next(framenumi, None) cur_f += 1 ffmpeg_err_str = ffmpeg_errs.getvalue() From 1688804a778b9eccf1a0f00ec51207443495fdc0 Mon Sep 17 00:00:00 2001 From: Keigh Rim Date: Fri, 28 Feb 2025 21:07:34 -0500 Subject: [PATCH 4/4] vdh now can fastforward 1k frames when sampled frame gap is larger (speed up video seeking) --- mmif/utils/video_document_helper.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/mmif/utils/video_document_helper.py b/mmif/utils/video_document_helper.py index 436d0761..c8a7be64 100644 --- a/mmif/utils/video_document_helper.py +++ b/mmif/utils/video_document_helper.py @@ -1,4 +1,6 @@ import importlib +import sys + import math import warnings from io import StringIO @@ -81,21 +83,30 @@ def extract_frames_as_images(video_document: Document, framenums: Iterable[int], :param as_PIL: return :py:class:`PIL.Image.Image` instead of :py:class:`~numpy.ndarray` :return: frames as a list of :py:class:`~numpy.ndarray` or :py:class:`~PIL.Image.Image` """ + import cv2 if as_PIL: from PIL import Image frames = [] video = capture(video_document) cur_f = 0 tot_fcount = video_document.get_property(FRAMECOUNT_DOCPROP_KEY) + # when the target frame is more than this frames away, fast-forward instead of reading frame by frame + # this is sanity-checked with a small number of video samples + # (frame-by-frame ndarrays are compared with fast-forwarded ndarrays) + skip_threadhold = 1000 framenumi = iter(framenums) # make sure that it's actually an iterator, in case a list is passed next_target_f = next(framenumi, None) from wurlitzer import pipes as cpipes - ffmpeg_outs = StringIO() ffmpeg_errs = StringIO() - with cpipes(stderr=ffmpeg_errs, stdout=ffmpeg_outs): + with cpipes(stderr=ffmpeg_errs, stdout=sys.stdout): while True: - if next_target_f is None or cur_f > tot_fcount: + if next_target_f is None or cur_f > tot_fcount or next_target_f > tot_fcount: break + if next_target_f - cur_f > skip_threadhold: + while next_target_f - cur_f > skip_threadhold: + cur_f += skip_threadhold + else: + video.set(cv2.CAP_PROP_POS_FRAMES, cur_f) ret, frame = video.read() if cur_f == next_target_f: if not ret: