From 95bd592a8d51f3fd966a8ffcf37232be85cc4569 Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Wed, 26 Feb 2025 17:27:05 -0500
Subject: [PATCH 1/4] vdh now uses iterator, instead of list-popping when
 extracting frames

---
 mmif/utils/video_document_helper.py | 17 +++++++++--------
 requirements.cv                     |  1 +
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/mmif/utils/video_document_helper.py b/mmif/utils/video_document_helper.py
index 2bf021fc..a59bff09 100644
--- a/mmif/utils/video_document_helper.py
+++ b/mmif/utils/video_document_helper.py
@@ -1,7 +1,8 @@
 import importlib
 import math
 import warnings
-from typing import List, Union, Tuple
+from typing import Iterable  # todo: replace with collections.abc.Iterable in Python 3.9
+from typing import List, Union, Tuple, Iterator
 
 import mmif
 from mmif import Annotation, Document, Mmif
@@ -67,36 +68,36 @@ def get_framerate(video_document: Document) -> float:
     return video_document.get_property(FPS_DOCPROP_KEY)
 
 
-def extract_frames_as_images(video_document: Document, framenums: List[int], as_PIL: bool = False):
+def extract_frames_as_images(video_document: Document, framenums: Iterable[int], as_PIL: bool = False):
     """
     Extracts frames from a video document as a list of :py:class:`numpy.ndarray`.
     Use with :py:func:`sample_frames` function to get the list of frame numbers first. 
     
     :param video_document: :py:class:`~mmif.serialize.annotation.Document` instance that holds a video document (``"@type": ".../VideoDocument/..."``)
-    :param framenums: integers representing the frame numbers to extract
+    :param framenums: iterable integers representing the frame numbers to extract
     :param as_PIL: return :py:class:`PIL.Image.Image` instead of :py:class:`~numpy.ndarray`
     :return: frames as a list of :py:class:`~numpy.ndarray` or :py:class:`~PIL.Image.Image`
     """
-    import cv2  # pytype: disable=import-error
     if as_PIL:
         from PIL import Image
     frames = []
     video = capture(video_document)
     cur_f = 0
     tot_fcount = video_document.get_property(FRAMECOUNT_DOCPROP_KEY)
-    framenums_copy = framenums.copy()
+    framenumi = iter(framenums)  # make sure that it's actually an iterator, in case a list is passed
+    next_target_f = next(framenumi, None)
     while True:
-        if not framenums_copy or cur_f > tot_fcount:
+        if next_target_f is None or cur_f > tot_fcount:
             break
         ret, frame = video.read()
-        if cur_f == framenums_copy[0]:
+        if cur_f == next_target_f:
             if not ret:
                 sec = convert(cur_f, 'f', 's', video_document.get_property(FPS_DOCPROP_KEY))
                 warnings.warn(f'Frame #{cur_f} ({sec}s) could not be read from the video {video_document.id}.')
                 cur_f += 1
                 continue
             frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame)
-            framenums_copy.pop(0)
+            next_target_f = next(framenumi, None)
         cur_f += 1
     return frames
 
diff --git a/requirements.cv b/requirements.cv
index 47ef0a21..a2c1cfb9 100644
--- a/requirements.cv
+++ b/requirements.cv
@@ -1,3 +1,4 @@
 pillow
 opencv-python
 ffmpeg-python
+wurlitzer

From 52762c0929ed46d5113dcf228467131c1a7c736a Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Wed, 26 Feb 2025 18:31:55 -0500
Subject: [PATCH 2/4] vdh frame extraction now redirects ffmpeg errors for
 downstream

---
 mmif/utils/video_document_helper.py | 47 ++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/mmif/utils/video_document_helper.py b/mmif/utils/video_document_helper.py
index a59bff09..ed03707d 100644
--- a/mmif/utils/video_document_helper.py
+++ b/mmif/utils/video_document_helper.py
@@ -1,15 +1,16 @@
 import importlib
 import math
 import warnings
+from io import StringIO
 from typing import Iterable  # todo: replace with collections.abc.Iterable in Python 3.9
-from typing import List, Union, Tuple, Iterator
+from typing import List, Union, Tuple
 
 import mmif
 from mmif import Annotation, Document, Mmif
 from mmif.utils.timeunit_helper import convert
 from mmif.vocabulary import DocumentTypes
 
-for cv_dep in ('cv2', 'ffmpeg', 'PIL'):
+for cv_dep in ('cv2', 'ffmpeg', 'PIL', 'wurlitzer'):
     try:
         importlib.__import__(cv_dep)
     except ImportError as e:
@@ -64,11 +65,13 @@ def get_framerate(video_document: Document) -> float:
         if k in video_document:
             fps = round(video_document.get_property(k), 2)
             return fps
-    capture(video_document)
-    return video_document.get_property(FPS_DOCPROP_KEY)
+    cap = capture(video_document)
+    fps = video_document.get_property(FPS_DOCPROP_KEY)
+    cap.release()
+    return fps
 
 
-def extract_frames_as_images(video_document: Document, framenums: Iterable[int], as_PIL: bool = False):
+def extract_frames_as_images(video_document: Document, framenums: Iterable[int], as_PIL: bool = False, record_ffmpeg_errors: bool = False):
     """
     Extracts frames from a video document as a list of :py:class:`numpy.ndarray`.
     Use with :py:func:`sample_frames` function to get the list of frame numbers first. 
@@ -86,19 +89,27 @@ def extract_frames_as_images(video_document: Document, framenums: Iterable[int],
     tot_fcount = video_document.get_property(FRAMECOUNT_DOCPROP_KEY)
     framenumi = iter(framenums)  # make sure that it's actually an iterator, in case a list is passed
     next_target_f = next(framenumi, None)
-    while True:
-        if next_target_f is None or cur_f > tot_fcount:
-            break
-        ret, frame = video.read()
-        if cur_f == next_target_f:
-            if not ret:
-                sec = convert(cur_f, 'f', 's', video_document.get_property(FPS_DOCPROP_KEY))
-                warnings.warn(f'Frame #{cur_f} ({sec}s) could not be read from the video {video_document.id}.')
-                cur_f += 1
-                continue
-            frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame)
-            next_target_f = next(framenumi, None)
-        cur_f += 1
+    from wurlitzer import pipes as cpipes
+    ffmpeg_outs = StringIO()
+    ffmpeg_errs = StringIO()
+    with cpipes(stderr=ffmpeg_errs, stdout=ffmpeg_outs):
+        while True:
+            if next_target_f is None or cur_f > tot_fcount:
+                break
+            ret, frame = video.read()
+            if cur_f == next_target_f:
+                if not ret:
+                    sec = convert(cur_f, 'f', 's', video_document.get_property(FPS_DOCPROP_KEY))
+                    warnings.warn(f'Frame #{cur_f} ({sec}s) could not be read from the video {video_document.id} @ {video_document.location} .')
+                    cur_f += 1
+                    continue
+                frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame)
+                next_target_f = next(framenumi, None)
+            cur_f += 1
+    ffmpeg_err_str = ffmpeg_errs.getvalue()
+    if ffmpeg_err_str and record_ffmpeg_errors:
+        warnings.warn(f'FFmpeg output during extracting frames: {ffmpeg_err_str}')
+    video.release()
     return frames
 
 

From a12a17e4b13790f8ed1b41ec71a9fa036c9d0684 Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Wed, 26 Feb 2025 18:32:55 -0500
Subject: [PATCH 3/4] fixed bug of early terminating seeking video when frame
 extraction fails

---
 mmif/utils/video_document_helper.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mmif/utils/video_document_helper.py b/mmif/utils/video_document_helper.py
index ed03707d..436d0761 100644
--- a/mmif/utils/video_document_helper.py
+++ b/mmif/utils/video_document_helper.py
@@ -101,9 +101,8 @@ def extract_frames_as_images(video_document: Document, framenums: Iterable[int],
                 if not ret:
                     sec = convert(cur_f, 'f', 's', video_document.get_property(FPS_DOCPROP_KEY))
                     warnings.warn(f'Frame #{cur_f} ({sec}s) could not be read from the video {video_document.id} @ {video_document.location} .')
-                    cur_f += 1
-                    continue
-                frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame)
+                else:
+                    frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame)
                 next_target_f = next(framenumi, None)
             cur_f += 1
     ffmpeg_err_str = ffmpeg_errs.getvalue()

From 1688804a778b9eccf1a0f00ec51207443495fdc0 Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Fri, 28 Feb 2025 21:07:34 -0500
Subject: [PATCH 4/4] vdh now can fastforward 1k frames when sampled frame gap
 is larger (speed up video seeking)

---
 mmif/utils/video_document_helper.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/mmif/utils/video_document_helper.py b/mmif/utils/video_document_helper.py
index 436d0761..c8a7be64 100644
--- a/mmif/utils/video_document_helper.py
+++ b/mmif/utils/video_document_helper.py
@@ -1,4 +1,6 @@
 import importlib
+import sys
+
 import math
 import warnings
 from io import StringIO
@@ -81,21 +83,30 @@ def extract_frames_as_images(video_document: Document, framenums: Iterable[int],
     :param as_PIL: return :py:class:`PIL.Image.Image` instead of :py:class:`~numpy.ndarray`
     :return: frames as a list of :py:class:`~numpy.ndarray` or :py:class:`~PIL.Image.Image`
     """
+    import cv2
     if as_PIL:
         from PIL import Image
     frames = []
     video = capture(video_document)
     cur_f = 0
     tot_fcount = video_document.get_property(FRAMECOUNT_DOCPROP_KEY)
+    # when the target frame is more than this frames away, fast-forward instead of reading frame by frame
+    # this is sanity-checked with a small number of video samples 
+    # (frame-by-frame ndarrays are compared with fast-forwarded ndarrays)
+    skip_threadhold = 1000  
     framenumi = iter(framenums)  # make sure that it's actually an iterator, in case a list is passed
     next_target_f = next(framenumi, None)
     from wurlitzer import pipes as cpipes
-    ffmpeg_outs = StringIO()
     ffmpeg_errs = StringIO()
-    with cpipes(stderr=ffmpeg_errs, stdout=ffmpeg_outs):
+    with cpipes(stderr=ffmpeg_errs, stdout=sys.stdout):
         while True:
-            if next_target_f is None or cur_f > tot_fcount:
+            if next_target_f is None or cur_f > tot_fcount or next_target_f > tot_fcount:
                 break
+            if next_target_f - cur_f > skip_threadhold:
+                while next_target_f - cur_f > skip_threadhold:
+                    cur_f += skip_threadhold
+                else:
+                    video.set(cv2.CAP_PROP_POS_FRAMES, cur_f)
             ret, frame = video.read()
             if cur_f == next_target_f:
                 if not ret: