diff --git a/docarray/document/mixins/video.py b/docarray/document/mixins/video.py index 2a90d940d55..e4e692f5aa6 100644 --- a/docarray/document/mixins/video.py +++ b/docarray/document/mixins/video.py @@ -1,11 +1,21 @@ import threading import time -from typing import Union, BinaryIO, TYPE_CHECKING, Generator, Type, Dict, Optional +from typing import ( + Union, + BinaryIO, + TYPE_CHECKING, + Generator, + Type, + Dict, + Optional, + Tuple, +) import numpy as np if TYPE_CHECKING: from docarray.typing import T + from docarray import Document class VideoDataMixin: @@ -14,6 +24,7 @@ class VideoDataMixin: @classmethod def generator_from_webcam( cls: Type['T'], + height_width: Optional[Tuple[int, int]] = None, show_window: bool = True, window_title: str = 'webcam', fps: int = 30, @@ -26,6 +37,8 @@ def generator_from_webcam( This feature requires the `opencv-python` package. + :param height_width: the shape of the video frame, if not provided, the shape will be determined from the first frame. + Note that this is restricted by the hardware of the camera. :param show_window: if to show preview window of the webcam video :param window_title: the window title of the preview window :param fps: expected frames per second, note that this is not guaranteed, as the actual fps depends on the hardware limit @@ -46,7 +59,11 @@ def generator_from_webcam( try: while not exit_event.is_set(): rval, frame = vc.read() - yield cls(tensor=frame, tags=tags) + d = cls(tensor=frame, tags=tags) # type: Document + if height_width: + d.set_image_tensor_shape(height_width) + + yield d key = cv2.waitKey(1000 // (fps + fps - actual_fps)) @@ -60,7 +77,7 @@ def generator_from_webcam( # putting the FPS count on the frame cv2.putText( - frame, + d.tensor, f'FPS {actual_fps:0.0f}/{fps}', (7, 70), cv2.FONT_HERSHEY_SIMPLEX, @@ -71,7 +88,7 @@ def generator_from_webcam( ) # displaying the frame with fps - cv2.imshow(window_title, frame) + cv2.imshow(window_title, d.tensor) if key == exit_key or not rval: break