Module vipy.video
Expand source code Browse git
import os
import sys
import dill
from vipy.globals import print
from vipy.util import remkdir, tempMP4, isurl, \
isvideourl, templike, tempjpg, filetail, tempdir, isyoutubeurl, try_import, isnumpy, temppng, \
istuple, islist, isnumber, tolist, filefull, fileext, isS3url, totempdir, flatlist, tocache, \
premkdir, writecsv, iswebp, ispng, isgif, filepath, Stopwatch, toextension, isjsonfile, isRTSPurl, isRTMPurl, iswebp, isgif
from vipy.image import Image
import vipy.geometry
import vipy.math
import vipy.image
import vipy.downloader
import vipy.globals
import vipy.activity
import copy
import numpy as np
import ffmpeg
import urllib.request
import urllib.error
import urllib.parse
from urllib.parse import urlparse, urlunparse
import http.client as httplib
import io
import PIL.Image
import warnings
import shutil
import types
import uuid
import platform
import time
from io import BytesIO
import itertools
import hashlib
from pathlib import PurePath
import queue
import threading
import collections
try:
import ujson as json # faster
except ImportError:
import json
ffmpeg_exe = shutil.which('ffmpeg')
has_ffmpeg = ffmpeg_exe is not None and os.path.exists(ffmpeg_exe)
ffprobe_exe = shutil.which('ffprobe')
has_ffprobe = ffprobe_exe is not None and os.path.exists(ffprobe_exe)
ffplay_exe = shutil.which('ffplay')
has_ffplay = ffplay_exe is not None and os.path.exists(ffplay_exe)
class Stream(object):
"""vipy.video.Stream class.
This class is the primary mechanism for streaming frames and clips from long videos or live video streams.
- The stream is constructed from a shared underlying video in self._video.
- As the shared video is updated with annotations, the stream can generate frames and clips that contain these annotations
- The shared video allows for multiple concurrent iterators all sourced from the same video, iterating over different frames, clips and rates
- The iterator leverages a pipe to FFMPEG, reading numpy frames from the video filter chain.
- The pipe is written from a thread which is dedicated to reading frames from ffmpeg
- Each numpy frame is added to a queue, with a null termintor when end of stream is reached
- The iterator then reads from the queue, and returns annotated frames
This iterator can also be used as a buffered stream. Buffered streams have a primary iterator which saves a fixed stream buffer
of frames so that subsequent iterators can pull temporally aligned frames. This is useful to avoid having multiple FFMPEG pipes
open simultaneously, and can allow for synchronized access to live video streams without timestamping.
- The primary iterator is the first iterator over the video with stream(buffered=True)
- The primary iterator creates a private attribute self._video.attributes['__stream_buffer'] which caches frames
- The stream buffer saves numpy arrays from the iterator with a fixed buffer length (number of frames)
- The secondary iterator (e.g. any iterator that accesses the video after the primary iterator is initially created) will read from the stream buffer
- All iterators share the underlying self._video object in the stream so that if the video annotations are updated by an iterator, the annotated frames are accessible in the iterators
- The secondary iterators are synchronized to the stream buffer that is read by the primary iterator. This is useful for synchronizing streams for live camera streams without absolute timestamps.
- There can be an unlimited number of secondary iterators, without incurring a penalty on frame access
This iterator can iterate over clips, frames or batches.
- A clip is a sequence of frames such that each clip is separated by a fixed number of frames.
- Clips are useful for temporal encoding of short atomic activities
- A batch is a sequence of n frames with a stride of n.
- A batch is useful for iterating over groups of frames that are operated in parallel on a GPU
```python
for (im1, im2, v3) in zip(v.stream(buffered=True), v.stream(buffered=True).frame(delay=30), v.stream(buffered=True).clip(n=16,m=1):
# im1: `vipy.image.Scene` at frame index k
# im2: `vipy.image.Scene` at frame index k-30
# v3: `vipy.video.Scene` at frame range [k, k-16]
```
.. note::
- This is designed to be accessed as `vipy.video.Video.stream` and not accessed as a standalone class..
"""
def __init__(self, v, queuesize, write, overwrite, bitrate=None, buffered=False, bufsize=256, rebuffered=False):
self._video = v # do not clone
self._write_pipe = None
self._vcodec = 'libx264'
self._bitrate = bitrate # e.g. '2000k', recommended settings for live streaming
self._framerate = self._video.framerate()
self._outfile = self._video.filename()
self._write = write or overwrite
assert self._write is False or (overwrite is True or not os.path.exists(self._outfile)), "Output file '%s' exists - Writable stream cannot overwrite existing video file unless overwrite=True" % self._outfile
if overwrite and os.path.exists(self._outfile):
os.remove(self._outfile)
self._shape = self._video.shape() if (not self._write) or (not self._video.isstreaming() and self._video.canload()) else None # shape for write can be defined by first frame
assert (write is True or overwrite is True) or self._shape is not None, "Invalid video '%s'" % (str(v))
self._queuesize = queuesize
self._bufsize = bufsize
self._buffered = buffered
self._is_stream_buffer_owner = False
assert self._bufsize >= 1
if rebuffered:
self._video.attributes.pop("__stream_buffer", None) # force reinitialization
def __enter__(self):
"""Write pipe context manager"""
assert self._write, "invalid parameters for write only context manager"
if self._shape is not None:
(height, width) = self._shape
outfile = self._outfile if self._outfile is not None else self._url # may be youtube/twitch live stream
outrate = 30 if vipy.util.isRTMPurl(outfile) else self._video.framerate()
fiv = (ffmpeg.input('pipe:', format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(width, height), r=self._video.framerate())
.filter('pad', 'ceil(iw/2)*2', 'ceil(ih/2)*2'))
fi = ffmpeg.concat(fiv.filter('fps', fps=30, round='up'), ffmpeg.input('anullsrc', f='lavfi'), v=1, a=1) if isRTMPurl(outfile) else fiv # empty audio for youtube-live
kwargs = {'video_bitrate':self._bitrate} if self._bitrate is not None else {}
fo = (fi.output(filename=self._outfile if self._outfile is not None else self._url,
pix_fmt='yuv420p',
vcodec=self._vcodec,
f='flv' if vipy.util.isRTMPurl(outfile) else vipy.util.fileext(outfile, withdot=False),
g=2*outrate,
**kwargs)
.overwrite_output()
.global_args('-cpuflags', '0', '-loglevel', 'quiet' if not vipy.globals.isdebug() else 'debug'))
self._write_pipe = fo.run_async(pipe_stdin=True)
self._writeindex = 0
return self
def __exit__(self, type=None, value=None, tb=None):
"""Write pipe context manager
..note:: This is triggered on ctrl-c as the last step for cleanup
"""
if self._write_pipe is not None:
self._write_pipe.stdin.close()
self._write_pipe.wait()
del self._write_pipe
self._write_pipe = None
if type is not None:
raise
return self
def __call__(self, im):
"""alias for write()"""
return self.write(im)
def __reduce__(self):
"""When pickling, ignore all open pipes, restore as read-only stream. User is responsible for restoring the stream after unpickling."""
return (self.__class__, (self._video, self._queuesize, False, False, self._bitrate, self._buffered, self._bufsize, True))
def _read_pipe(self):
if not self._video.isloaded():
p = self._video._ffmpeg.output('pipe:', format='rawvideo', pix_fmt='rgb24').global_args('-nostdin', '-loglevel', 'debug' if vipy.globals.isdebug() else 'quiet').run_async(pipe_stdout=True, pipe_stderr=True)
assert p is not None, "Invalid read pipe"
p.poll()
return p
else:
return None
def framerate(self):
return self._video.framerate()
def __iter__(self):
"""Stream individual video frames.
"""
try:
if self._video.isloaded():
# For loaded video, just use the existing iterator for in-memory video
for k in range(len(self._video)):
yield self._video[k]
else:
# First stream iterator: read from video and store in stream buffer for all other iterators to access
if self._buffered and not self._video.hasattribute('__stream_buffer'):
self._video.attributes['__stream_buffer'] = {} # for synchronized frames with secondary iterator
self._is_stream_buffer_owner = True # track which iterator created the stream buffer for cleanup in 'finally'
# Video pipe thread:
# - Initialized only if not buffered (the default) or if primary iterator
# - read numpy frames from the ffmpeg filter chain via a pipe
# - store the resulting frames in a queue with a null terminated frame when the stream ends
# - Threading is useful here because there is often time to switch when waiting on GPU I/O
(t,q,p) = (None, None,None)
if not self._buffered or self._is_stream_buffer_owner:
p = self._read_pipe()
q = queue.Queue(self._queuesize)
(h, w) = self._shape
def _f_threadloop(pipe, queue, height, width, event):
assert pipe is not None, "Invalid pipe"
assert queue is not None, "invalid queue"
f = 0
while True:
in_bytes = pipe.stdout.read(height * width * 3)
if not in_bytes:
queue.put(None)
pipe.poll()
pipe.wait()
queue.join()
event.wait()
if pipe.returncode != 0:
raise ValueError('Stream iterator exited with returncode %d' % (pipe.returncode))
break
else:
queue.put(np.frombuffer(in_bytes, np.uint8).reshape([height, width, 3]))
e = threading.Event()
t = threading.Thread(target=_f_threadloop, args=(p, q, h, w, e), daemon=True, name='vipy.video.Stream.__iter__')
t.start()
# Stream iterator:
# -read frames from the thread queue and write to the private stream buffer stored as a video attribute with a frame index
# -Frames are also yielded for the primary iterator
# -The stream buffer is a dictionary that is n frames long, and if the newest frame from the pipe is frame k, the oldest frame is k-n which is yielded first
# -If the stream is unbuffered, just read from the queue directly and yield the numpy frame
b = self._video.attributes['__stream_buffer'] if self._buffered else None # buffer (if requested)
k = 0 # current frame
while True:
if not self._buffered or self._is_stream_buffer_owner:
# Primary iterator: read from thread queue
if b is None:
(f, img) = (k, q.get()) # unbuffered: to yield latest from thread queue directly
q.task_done()
k += 1
else:
b[k] = q.get() # add to stream buffer, cache frames only, annotations are added synchronously on yield
q.task_done()
(f, img) = (k, b[k]) # primary buffer: yield current frame from pipe
k += 1
else:
# Secondary iterator: read from stream buffer
while k not in b:
# "Event" wait: wait for primary iterator to start up and fill buffer, after it starts filling, this sleep should be unnecessary
time.sleep(0.001)
(f, img) = (k, b[k]) # secondary buffers: yield from stream buffer
k += 1
if img is not None:
yield self._video.frame(f, img) # yield a vipy.image.Scene object with annotations at frame f, using the latest annotation from the shared video object and shallow copy of img
if b is not None and self._is_stream_buffer_owner:
if len(b) > self._bufsize:
del b[min(b.keys())] # remove oldest frame from stream buffer
else:
if not self._buffered or self._is_stream_buffer_owner:
e.set()
break # termination
if t is not None:
del p,q,t
except:
raise
finally:
if self._is_stream_buffer_owner:
self._video.delattribute('__stream_buffer') # cleanup, or force a reinitialization by passing the rebuffered=True to the primary iterator
def __getitem__(self, k):
"""Retrieve individual frame index - this is inefficient, use __iter__ instead"""
return self._video.preview(frame=k) # this is inefficient
def write(self, im, flush=False):
"""Write individual frames to write stream"""
assert isinstance(im, vipy.image.Image)
if self._shape is None:
self._shape = im.shape()
assert im.channels() == 3, "RGB frames required"
self.__enter__()
assert self._write_pipe is not None, "Write stream cannot be initialized"
assert im.shape() == self._shape, "Shape cannot change during writing"
self._write_pipe.stdin.write(im.array().astype(np.uint8).tobytes())
if flush:
self._write_pipe.stdin.flush() # do we need this?
if isinstance(im, vipy.image.Scene) and len(im.objects()) > 0 and isinstance(self._video, vipy.video.Scene):
for obj in im.objects():
self._video.add(obj, frame=self._writeindex, rangecheck=False)
self._writeindex += 1 # assumes that the source image is at the appropriate frame rate for this video
def clip(self, n, m=1, continuous=False, tracks=True, activities=True, delay=0, ragged=False):
"""Stream clips of length n such that the yielded video clip contains frame(0+delay) to frame(n+delay), and next contains frame(m+delay) to frame(n+m+delay).
Usage examples:
```python
for vc in v.stream().clip(n=16, m=2):
# yields video vc with frames [0,16] from v
# then video vc with frames [2,18] from v
# ... finally video with frames [len(v)-n-1, len(v)-1]
```
Introducing a delay so that the clips start at a temporal offset from v
```python
for vc in v.stream().clip(n=8, m=3, delay=1):
# yields video vc with frames [1,9]
# then video vc with frames [4,12] ...
```
Args:
n: [int] the length of the clip in frames
m: [int] the stride between clips in frames
delay: [int] The temporal delay in frames for the clip, must be less than n and >= 0
continuous: [bool] if true, then yield None for the sequential frames not aligned with a stride so that a clip is yielded on every frame
activities: [bool] if false, then activities from the source video are not copied into the clip
tracks: [bool] if false, then tracks from the source video are not copied into the clip
Returns:
An iterator that yields `vipy.video.Video` objects each of length n with startframe += m, starting at frame=delay, such that each video contains the tracks and activities (if requested) for this clip sourced from the shared stream video.
.. note:: This iterator runs in a thread to help speed up fetching of frames for GPU I/Oe bound operations
"""
assert isinstance(n, int) and n>0, "Clip length must be a positive integer"
assert isinstance(m, int) and m>0, "Clip stride must be a positive integer"
assert isinstance(delay, int) and delay >= 0 and delay < n, "Clip delay must be a positive integer less than n"
assert not self._buffered or 3*n < self._bufsize, "increase buffered stream size (bufsize) from %d to >%d" % (self._bufsize, 3*n)
def _f_threadloop(v, streamiter, queue, event, ragged, m, n):
(frames, newframes) = ([], [])
for (k,im) in enumerate(streamiter()):
newframes.append(im)
if len(newframes) >= m and len(frames)+len(newframes) >= n:
# Use frameindex+1 so that we include (0,1), (1,2), (2,3), ... for n=2, m=1
# The delay shifts the clip +delay frames (1,2,3), (3,4,5), ... for n=3, m=2, delay=1
frames.extend(newframes)
(frames, newframes) = (frames[-n:], [])
queue.put( (v.clear().clone(shallow=True).fromframes(frames), k) ) # fromframes() triggers array copy of frames
elif continuous:
queue.put( (None, k) )
if ragged and len(newframes) > 0:
queue.put( (v.clear().clone(shallow=True).fromframes(newframes), k) ) # fromframes() triggers array copy of newframes
queue.put( (None, None) )
event.wait()
vc = self._video.clone(flushfilter=True).clear().nourl().nofilename()
q = queue.Queue(3) # warning: if this queuesize*n > buffersize, then there can be a deadlock
e = threading.Event()
t = threading.Thread(target=_f_threadloop, args=(vc, self.__iter__, q, e, ragged, m, n), daemon=True, name='vipy.video.Stream.clip')
t.start()
f_copy_annotations = lambda v, k, n: (v.activities([a.clone().offset(-(k-(n-1))).truncate(0,n-1) for (ak,a) in self._video.activities().items() if a.during_interval(k-(n-1), k, inclusive=False)] if activities else [])
.tracks([t.clone(k-(n-1), k).offset(-(k-(n-1))).truncate(0,n-1) for (tk,t) in self._video.tracks().items() if t.during_interval(k-(n-1), k)] if tracks else [])
if (v is not None and isinstance(v, vipy.video.Scene)) else v)
try:
while True:
# The queue can be filled with more expensive copies and clones to speed up iteration when waiting for GPU I/O
(v, k) = q.get()
q.task_done()
if k is None:
e.set()
break
else:
# This copy must be done sychronized at frame k with the current state of the annotations in the shared self._video
yield f_copy_annotations(v, k, len(v)) if v is not None else None
finally:
e.set() # thread cleanup on early exit
del q,t
def batch(self, n):
"""Stream batches of length n such that each batch contains frames [0, n-1], [n, 2n-1], ... Last batch will be ragged.
The primary use case for batch() is to provide a mechanism for parallel batch processing on a GPU.
```python
for im_gpu in myfunc(vi.stream().batch(16))):
print(im_gpu)
def myfunc(gen):
for vb in gen:
# process the batch vb (length n) in parallel by encoding on a GPU with batchsize=n
for im in f_gpu(vb):
yield im_gpu:
```
This will then yield the GPU batched processed image im_gpu.
"""
return self.clip(n=n, m=n, continuous=False, ragged=True)
def frame(self, delay=0):
"""Stream individual frames of video with negative offset n frames to the stream head. If delay=30, this will return a frame 30 frames ago"""
assert isinstance(delay, int) and delay >= 0, "Frame delay must be positive integer"
n = -delay
frames = []
i = 0
for (k,im) in enumerate(self):
frames.append( (k,im) )
(kout, imout) = frames[0]
frames.pop(0) if len(frames) > abs(n) else None
i = k
yield self._video.frame(kout, imout.array()) if len(frames) == delay else None # refetch for track interpolation
class Video(object):
""" vipy.video.Video class
The vipy.video class provides a fluent, lazy interface for representing, transforming and visualizing videos.
The following constructors are supported:
```python
vid = vipy.video.Video(filename='/path/to/video.ext')
```
Valid video extensions are those that are supported by ffmpeg ['.avi','.mp4','.mov','.wmv','.mpg', 'mkv', 'webm'].
```python
vid = vipy.video.Video(url='https://www.youtube.com/watch?v=MrIN959JuV8')
vid = vipy.video.Video(url='http://path/to/video.ext', filename='/path/to/video.ext')
```
Youtube URLs are downloaded to a temporary filename, retrievable as vid.download().filename(). If the environment
variable 'VIPY_CACHE' is defined, then videos are saved to this directory rather than the system temporary directory.
If a filename is provided to the constructor, then that filename will be used instead of a temp or cached filename.
URLs can be defined as an absolute URL to a video file, or to a site supported by 'youtube-dl' (https://ytdl-org.github.io/youtube-dl/supportedsites.html)
```python
vid = vipy.video.Video(url='s3://BUCKET.s3.amazonaws.com/PATH/video.ext')
```
If you set the environment variables VIPY_AWS_ACCESS_KEY_ID and VIPY_AWS_SECRET_ACCESS_KEY, then this will download videos directly from S3 using boto3 and store in VIPY_CACHE.
Note that the URL protocol should be 's3' and not 'http' to enable keyed downloads.
```python
vid = vipy.video.Video(array=array, colorspace='rgb')
```
The input 'array' is an NxHxWx3 numpy array corresponding to an N-length list of HxWx3 uint8 numpy array which is a single frame of pre-loaded video
Note that some video transformations are only available prior to load(), and the array() is assumed immutable after load().
```python
frames = [im for im in vipy.video.RandomVideo()]
vid = vipy.video.Video(frames=frames)
```
The input can be an RTSP video stream. Note that streaming is most efficiently performed using `vipy.video.Scene`. The URL must contain the 'rtsp://' url scheme.
You can experiment with this using the free Periscope H.264 RTSP App (https://apps.apple.com/us/app/periscope-hd-h-264-rtsp-cam/id1095600218)
```python
vipy.video.Scene(url='rtsp://127.0.0.1:8554/live.sdp').show()
for im in vipy.video.Scene(url='rtsp://127.0.0.1:8554/live.sdp').stream():
print(im)
```
See also 'pip install heyvi'
Args:
filename: [str] The path to a video file.
url: [str] The URL to a video file. If filename is not provided, then a random filename is assigned in VIPY_CACHE on download
framerate: [float] The framerate of the video file. This is required. You can introspect this using ffprobe.
attributes: [dict] A user supplied dictionary of metadata about this video.
colorspace: [str] Must be in ['rgb', 'float']
array: [numpy] An NxHxWxC numpy array for N frames each HxWxC shape
startframe: [int] A start frame to clip the video
endframe: [int] An end frame to clip the video
startsec: [float] A start time in seconds to clip the video (this requires setting framerate)
endsec: [float] An end time in seconds to clip the video (this requires setting framerate)
frames: [list of `vipy.image.Image`] A list of frames in the video
probeshape: [bool] If true, then probe the shape of the video from ffprobe to avoid an explicit preview later. This can speed up loading in some circumstances.
"""
def __init__(self, filename=None, url=None, framerate=30.0, attributes=None, array=None, colorspace=None, startframe=None, endframe=None, startsec=None, endsec=None, frames=None, probeshape=False):
self._url = None
self._filename = None
self._array = None
self._colorspace = None
self._ffmpeg = None
self._framerate = None
self.attributes = attributes if attributes is not None else {}
assert isinstance(self.attributes, dict), "Attributes must be a python dictionary"
assert filename is not None or url is not None or array is not None or frames is not None, 'Invalid constructor - Requires "filename", "url" or "array" or "frames"'
if (url is None and isurl(filename)) or (url == filename and (isRTSPurl(url) or isRTMPurl(url))):
(url, filename) = (filename, None) # correct for common typo vipy.video.Video('rtsp://....')
assert not isurl(filename)
# FFMPEG installed?
if not has_ffmpeg:
warnings.warn('"ffmpeg" executable not found on path, this is required for vipy.video - Install from http://ffmpeg.org/download.html')
# Constructor clips
startframe = startframe if startframe is not None else (0 if endframe is not None else startframe)
assert (startsec is not None and endsec is not None) or (startsec is None and endsec is None), "Invalid input - (startsec,endsec) are both required"
(self._startframe, self._endframe) = (None, None) # __repr__ only
(self._startsec, self._endsec) = (None, None) # __repr__ only (legacy, no longer used)
# Input filenames
if url is not None:
assert isurl(url), 'Invalid URL "%s" ' % url
self._url = url
if filename is not None:
self._filename = os.path.normpath(os.path.expanduser(filename))
elif self._url is not None:
if isS3url(self._url):
self._filename = totempdir(self._url) # Preserve S3 Object ID
elif isRTSPurl(self._url):
# https://ffmpeg.org/ffmpeg-protocols.html#rtsp
self._filename = self._url
elif isRTMPurl(self._url):
# https://ffmpeg.org/ffmpeg-protocols.html#rtmp
self._filename = self._url
elif isvideourl(self._url):
self._filename = templike(self._url)
elif isyoutubeurl(self._url):
self._filename = os.path.join(tempdir(), '%s' % (self._url.split('?')[1].split('&')[0] if '?' in self._url else self._url.split('/')[-1]))
else:
self._filename = totempdir(self._url)
if vipy.globals.cache() is not None and self._filename is not None and not self.isstreaming():
self._filename = os.path.join(remkdir(vipy.globals.cache()), filetail(self._filename))
# Initial video shape: useful to avoid preview()
if not self.isstreaming():
self._ffmpeg = ffmpeg.input(self.filename())
elif isRTSPurl(url):
self._ffmpeg = ffmpeg.input(self.filename(), rtsp_flags='prefer_tcp')
elif isRTMPurl(url):
rtmp = urlparse(url) # rtmp://HOST:PORT/PATH?listen
self._ffmpeg = ffmpeg.input(urlunparse( (rtmp.scheme,rtmp.netloc,rtmp.path,None,None,None) ), listen='1') if 'listen' in rtmp.query else ffmpeg.input(self.filename())
if probeshape and (frames is None and array is None) and has_ffprobe and self.hasfilename():
self.shape(self.probeshape())
else:
self._shape = None # preview() on shape()
# Video filter chain
if framerate is not None:
if array is None and frames is None:
self.framerate(framerate)
self._framerate = framerate
if startframe is not None:
self.clip(startframe, endframe)
if startsec is not None:
# WARNING: if the user does not supply the correct framerate for the video, then this will be wrong since these are converted to frames
self.clip(int(round(startsec/self.framerate())), int(round(endsec/self.framerate())) if endsec is not None else None)
# Array input
assert not (array is not None and frames is not None)
if array is not None:
self.array(array)
self.colorspace(colorspace)
elif frames is not None and (isinstance(frames, list) or isinstance(frames, tuple)) and all([isinstance(im, vipy.image.Image) for im in frames]):
self.fromframes(frames)
elif frames is not None and (isinstance(frames, list) or isinstance(frames, tuple)) and all([isinstance(im, str) and os.path.exists(im) for im in frames]):
self.fromframes([vipy.image.Image(filename=f) for f in frames])
elif frames is not None and (isinstance(frames, str) and os.path.isdir(frames)):
self.fromdirectory(frames)
@classmethod
def cast(cls, v):
"""Cast a conformal video object to a `vipy.video.Video` object.
This is useful for downcasting superclasses.
```python
vs = vipy.video.RandomScene()
v = vipy.video.Video.cast(vs)
```
"""
assert isinstance(v, vipy.video.Video), "Invalid input - must be derived from vipy.video.Video"
v.__class__ = vipy.video.Video
return v
@classmethod
def from_json(cls, s):
"""Import a json string as a `vipy.video.Video` object.
This will perform a round trip from a video to json and back to a video object.
This same operation is used for serialization of all vipy objects to JSON for storage.
```python
v = vipy.video.Video.from_json(vipy.video.RandomVideo().json())
```
"""
d = json.loads(s) if not isinstance(s, dict) else s
d = {k.lstrip('_'):v for (k,v) in d.items()} # prettyjson (remove "_" prefix to attributes)
v = cls(filename=d['filename'],
url=d['url'],
framerate=d['framerate'],
array=np.array(d['array']) if d['array'] is not None else None,
colorspace=d['colorspace'],
attributes=d['attributes'],
startframe=d['startframe'],
endframe=d['endframe'],
startsec=d['startsec'],
endsec=d['endsec'])
v._ffmpeg = v._from_ffmpeg_commandline(d['ffmpeg'])
return v.filename(d['filename']) if d['filename'] is not None else v.nofilename()
def __repr__(self):
strlist = []
if self.isloaded():
strlist.append("height=%d, width=%d, frames=%d, color=%s" % (self.height(), self.width(), len(self), self.colorspace()))
if self.filename() is not None:
strlist.append('filename="%s"' % self.filename())
if self.hasurl():
strlist.append('url="%s"' % self.url())
if not self.isloaded() and self._startframe is not None and self._endframe is not None:
strlist.append('clip=(%d,%d)' % (self._startframe, self._endframe))
if not self.isloaded() and self._startframe is not None and self._endframe is None:
strlist.append('clip=(%d,)' % (self._startframe))
if self._framerate is not None:
strlist.append('fps=%1.1f' % float(self._framerate))
return str('<vipy.video: %s>' % (', '.join(strlist)))
def __len__(self):
"""Number of frames in the video if loaded, else zero.
.. notes:: Do not automatically trigger a load, since this can interact in unexpected ways with other tools that depend on fast __len__()
"""
if not self.isloaded():
warnings.warn('Load() video to see number of frames - Returning zero') # should this just throw an exception?
return len(self.array()) if self.isloaded() else 0
def __getitem__(self, k):
"""Alias for `vipy.video.Video.frame`"""
return self.frame(k)
def metadata(self, k=None):
"""Return a dictionary of metadata about this video.
Args:
k [str]: If provided, return just the specified key of the attributes dictionary, otherwise return the attributes dictionary
Returns:
The 'attributes' dictionary, or just the value for the provided key k if provided
"""
return self.attributes if k is None else self.attributes[k]
def sanitize(self):
"""Remove all private keys from the attributes dictionary.
The attributes dictionary is useful storage for arbitrary (key,value) pairs. However, this storage may contain sensitive information that should be scrubbed from the video before serialization. As a general rule, any key that is of the form '__keyname' prepended by two underscores is a private key. This is analogous to private or reserved attributes in the python lanugage. Users should reserve these keynames for those keys that should be sanitized and removed before any seerialization of this object.
```python
assert self.setattribute('__mykey', 1).sanitize().hasattribute('__mykey') == False
```
"""
if self._has_private_attribute():
self.attributes = {k:v for (k,v) in self.attributes.items() if not k.startswith('__')}
return self
def videoid(self, newid=None):
"""Return a unique video identifier for this video, as specified in the 'video_id' attribute, or by SHA1 hash of the `vipy.video.Video.filename` and `vipy.video.Video.url`.
Args:
newid: [str] If not None, then update the video_id as newid.
Returns:
The video ID if newid=None else self
.. note::
- If the video filename changes (e.g. from transformation), and video_id is not set in self.attributes, then the video ID will change.
- If a video does not have a filename or URL or a video ID in the attributes, then this will return None
- To preserve a video ID independent of transformations, set self.setattribute('video_id', ${MY_ID}), or pass in newid
"""
if newid is not None:
self.setattribute('video_id', newid)
return self
else:
return self.attributes['video_id'] if 'video_id' in self.attributes else (hashlib.sha1(str(str(self.filename())+str(self.url())).encode("UTF-8")).hexdigest() if (self.filename() is not None or self.url() is not None) else None)
def frame(self, k=0, img=None, t=None):
"""Return the kth frame as an `vipy.image Image` object"""
k = int(self.framerate()*t) if t is not None else k
assert isinstance(k, int) and k>=0, "Frame index must be non-negative integer"
return Image(array=img if img is not None else (self._array[k] if self.isloaded() else self.preview(k).array()), colorspace=self.colorspace())
def __iter__(self):
"""Iterate over video, yielding read only frames.
```python
for im in vipy.video.RandomScene():
print(im)
```
"""
return self.stream().__iter__()
def store(self):
"""Store the current video file as an attribute of this object.
Useful for archiving an object to be fully self contained without any external references.
```python
v == v.store().restore(v.filename())
```
.. note::
-Remove this stored video using unstore()
-Unpack this stored video and set up the video chains using restore()
-This method is more efficient than load() followed by pkl(), as it stores the encoded video as a byte string.
-Useful for creating a single self contained object for distributed processing.
"""
assert self.hasfilename(), "Video file not found. Try saveas() first to create a video file to store."
with open(self.filename(), 'rb') as f:
self.attributes['__video__'] = f.read()
return self
def unstore(self):
"""Delete the currently stored video from `vipy.video.Video.store"""
return self.delattribute('__video__')
def restore(self, filename):
"""Save the currently stored video as set using `vipy.video.Video.store` to filename, and set up filename"""
assert self.hasattribute('__video__'), "Video not stored"
with open(filename, 'wb') as f:
f.write(self.attributes['__video__'])
return self.filename(filename)
@classmethod
def concatenate(cls, videos, outfile, framerate=30, youtube_chapters=None):
"""Temporally concatenate a sequence of videos into a single video stored in outfile.
```python
(v1, v2, v3) = (vipy.video.RandomVideo(128,128,32), vipy.video.RandomVideo(128,128,32), vipy.video.RandomVideo(128,128,32))
vc = vipy.video.Video.concatenate((v1, v2, v3), 'concatenated.mp4', youtube_chapters=lambda v: v.category())
```
In this example, vc will point to concatenated.mp4 which will contain (v1,v2,v3) concatenated temporally .
Args:
videos: a single video or an iterable of videos of type `vipy.video.Video` or an iterable of video files
outfile: the output filename to store the concatenation.
youtube_chapters: [bool, callable]: If true, output a string that can be used to define the start and end times of chapters if this video is uploaded to youtube. The string output should be copied to the youtube video description in order to enable chapters on playback. This argument will default to the string representation ofo the video, but you may also pass a callable of the form: 'youtube_chapters=lambda v: str(v)' which will output the provided string for each video chapter. A useful lambda is 'youtube_chapters=lambda v: v.category()'
framerate: [float]: The output frame rate of outfile
Returns:
A `vipy.video.Video` object with filename()=outfile, such that outfile contains the temporal concatenation of pixels in (self, videos).
.. note::
- self will not be modified, this will return a new `vipy.video.Video` object.
- All videos must be the same shape(). If the videos are different shapes, you must pad them to a common size equal to self.shape(). Try `vipy.video.Video.zeropadlike`.
- The output video will be at the framerate of self.framerate().
- if you want to concatenate annotations, call `vipy.video.Scene.annotate` first on the videos to save the annotations into the pixels, then concatenate.
"""
assert len(tolist(videos))>0 and (all([isinstance(v, vipy.video.Video) for v in tolist(videos)]) or all([os.path.exists(f) and vipy.util.isvideofile(f) for f in tolist(videos)]))
vi = tolist(videos) if all([isinstance(v, vipy.video.Video) for v in tolist(videos)]) else [cls(filename=f) for f in tolist(videos)]
assert all([vij.shape() == vik.shape() for vij in vi for vik in vi]), "Video shapes must all the same, try padding"
vo = cls(filename=outfile, framerate=vi[0].framerate())
with vo.stream(overwrite=True) as s:
for v in vi:
for im in v.clone().framerate(framerate).stream():
s.write(im)
if youtube_chapters is not None:
f = youtube_chapters if callable(youtube_chapters) else lambda v: str(v).replace('<','').replace('>','') # angle brackets not allowed
print('[vipy.video.concatenate]: Copy the following into the video Description after uploading the videofile "%s" to YouTube to enable chapters on playback.\n' % outfile)
print('\n'.join(['%s %s' % (vipy.util.seconds_to_MMSS_colon_notation(int(s)), str(f(v))) for (s,v) in zip(np.cumsum([0] + [v.duration() for v in vi][:-1]), vi)])); print('\n')
if any([v.duration() < 10 for v in vi]):
warnings.warn('YouTube chapters must be a minimum duration of 10 seconds')
return vo
def stream(self, write=False, overwrite=False, queuesize=512, bitrate=None, buffered=False, rebuffered=False, bufsize=256):
"""Iterator to yield groups of frames streaming from video.
A video stream is a real time iterator to read or write from a video. Streams are useful to group together frames into clips that are operated on as a group.
The following use cases are supported:
```python
v = vipy.video.RandomScene()
```
Stream individual video frames lagged by 10 frames and 20 frames
```python
for (im1, im2) in zip(v.stream().frame(n=-10), v.stream().frame(n=-20)):
print(im1, im2)
```
Stream overlapping clips such that each clip is a video n=16 frames long and starts at frame i, and the next clip is n=16 frames long and starts at frame i=i+m
```python
for vc in v.stream().clip(n=16, m=4):
print(vc)
```
Stream non-overlapping batches of frames such that each clip is a video of length n and starts at frame i, and the next clip is length n and starts at frame i+n
```python
for vb in v.stream().batch(n=16):
print(vb)
```
Create a write stream to incrementally add frames to long video.
```python
vi = vipy.video.Video(filename='/path/to/output.mp4')
vo = vipy.video.Video(filename='/path/to/input.mp4')
with vo.stream(write=True) as s:
for im in vi.stream():
s.write(im) # manipulate pixels of im, if desired
```
Create a 480p YouTube live stream from an RTSP camera at 5Hz
```python
vo = vipy.video.Scene(url='rtmp://a.rtmp.youtube.com/live2/$SECRET_STREAM_KEY')
vi = vipy.video.Scene(url='rtsp://URL').framerate(5)
with vo.framerate(5).stream(write=True, bitrate='1000k') as s:
for im in vi.framerate(5).resize(cols=854, rows=480):
s.write(im)
```
Args:
write: [bool] If true, create a write stream
overwrite: [bool] If true, and the video output filename already exists, overwrite it
bufsize: [int] The maximum queue size for the ffmpeg pipe thread in the primary iterator. The queue size is the maximum size of pre-fetched frames from the ffmpeg pip. This should be big enough that you are never waiting for queue fills
bitrate: [str] The ffmpeg bitrate of the output encoder for writing, written like '2000k'
bufsize: [int] The maximum size of the stream buffer in frames. The stream buffer length should be big enough so that all iterators can yield before deleting old frames
Returns:
A Stream object
..note:: Using this iterator may affect PDB debugging due to stdout/stdin redirection. Use ipdb instead.
"""
return Stream(self, queuesize=queuesize, write=write, overwrite=overwrite, bitrate=bitrate, buffered=buffered, rebuffered=rebuffered, bufsize=bufsize) # do not clone
def clear(self):
"""no-op for `vipy.video.Video` object, used only for `vipy.video.Scene`"""
return self
def bytes(self):
"""Return a bytes representation of the video file"""
assert self.hasfilename(), "Invalid filename"
with open(self.filename(), 'rb') as f:
data = io.BytesIO(f.read())
return str(data.read()).encode('UTF-8')
def frames(self):
"""Alias for __iter__()"""
return self.__iter__()
def framelist(self):
return list(self.frames())
def _update_ffmpeg_seek(self, timestamp_in_seconds=0, offset=0):
if timestamp_in_seconds == 0 and offset == 0:
return self
nodes = ffmpeg.nodes.get_stream_spec_nodes(self._ffmpeg)
sorted_nodes, outgoing_edge_maps = ffmpeg.dag.topo_sort(nodes)
for n in sorted_nodes:
if 'input' == n.__dict__['name']:
if 'ss' not in n.__dict__['kwargs']:
n.__dict__['kwargs']['ss'] = 0
if timestamp_in_seconds == 0:
n.__dict__['kwargs']['ss'] = n.__dict__['kwargs']['ss'] + offset
else:
n.__dict__['kwargs']['ss'] = timestamp_in_seconds + offset
return self
raise ValueError('invalid ffmpeg argument "%s" -> "%s"' % ('ss', timestamp_in_seconds))
def _update_ffmpeg(self, argname, argval, node_name=None):
"""Update the ffmpeg filter chain to overwrite the (argname, argval) elements.
Useful for fine-tuning a filter chain without rewwriring the whole thing.
"""
nodes = ffmpeg.nodes.get_stream_spec_nodes(self._ffmpeg)
sorted_nodes, outgoing_edge_maps = ffmpeg.dag.topo_sort(nodes)
for n in sorted_nodes:
if argname in n.__dict__['kwargs'] or node_name == n.__dict__['name']:
n.__dict__['kwargs'][argname] = argval
return self
raise ValueError('invalid ffmpeg argument "%s" -> "%s"' % (argname, argval))
def _ffmpeg_commandline(self, f=None):
"""Return the ffmpeg command line string that will be used to process the video"""
cmd = f.compile() if f is not None else self._ffmpeg.output('dummyfile').compile()
for (k,c) in enumerate(cmd):
if c is None:
cmd[k] = str(c)
elif 'filter' in c:
cmd[k+1] = '"%s"' % str(cmd[k+1])
elif 'map' in c:
cmd[k+1] = '"%s"' % str(cmd[k+1])
return str(' ').join(cmd)
def commandline(self):
"""Return the equivalent ffmpeg command line string that will be used to transcode the video.
This is useful for introspecting the complex filter chain that will be used to process the video. You can try to run this command line yourself for debugging purposes, by replacing 'dummyfile' with an appropriately named output file.
"""
return self._ffmpeg_commandline()
def _from_ffmpeg_commandline(self, cmd, strict=False):
"""Convert the ffmpeg command line string (e.g. from `vipy.video.Video.commandline`) to the corresponding ffmpeg-python filter chain and update self"""
args = copy.copy(cmd).replace(str(self.filename()), 'FILENAME').split(' ') # filename may contain spaces
assert args[0] == 'ffmpeg', "Invalid FFMEG commmand line '%s'" % cmd
assert args[1] == '-i' or (args[3] == '-i' and (args[1] == '-ss' or args[1] == '-rtsp_flags' or args[1] == '-listen')), "Invalid FFMEG commmand line '%s'" % cmd
assert args[-1] == 'dummyfile', "Invalid FFMEG commmand line '%s'" % cmd
assert len(args) >= 4, "Invalid FFMEG commmand line '%s'" % cmd
if args[1] == '-ss':
timestamp_in_seconds = float(args[2])
timestamp_in_seconds = int(timestamp_in_seconds) if timestamp_in_seconds == 0 else timestamp_in_seconds # 0.0 -> 0
args = [args[0]] + args[3:]
f = ffmpeg.input(args[2].replace('FILENAME', self.filename()), ss=timestamp_in_seconds) # restore filename, set offset time
self._startframe = int(round(timestamp_in_seconds*self.framerate())) # necessary for clip() and __repr__
elif args[1] == '-rtsp_flags':
rtsp_flag = str(args[2])
args = [args[0]] + args[3:]
f = ffmpeg.input(args[2].replace('FILENAME', self.filename()), rtsp_flags=rtsp_flag) # restore filename, set rtsp flag
elif args[1] == '-listen':
listenval = str(args[2])
args = [args[0]] + args[3:]
f = ffmpeg.input(args[2].replace('FILENAME', self.filename()), listen=listenval) # restore filename, set RTMP listener
else:
f = ffmpeg.input(args[2].replace('FILENAME', self.filename())) # restore filename
if len(args) > 4:
assert args[3] == '-filter_complex', "Invalid FFMEG commmand line '%s'" % cmd
assert args[4][0] == '"' and args[4][-1] == '"', "Invalid FFMEG commmand line '%s'" % cmd
filterargs = args[4][1:-1].split(';')
for a in filterargs:
assert a.count(']') == 2 and a.count('[') == 2
kwstr = a.split(']', maxsplit=1)[1].split('[', maxsplit=1)[0]
if kwstr.count('=') == 0:
f = f.filter(kwstr)
else:
(a, kw) = ([], {})
(filtername, kwstr) = kwstr.split('=', maxsplit=1)
for s in kwstr.split(':'):
if s.count('=') > 0:
(k,v) = s.split('=')
kw[k] = v
else:
a.append(s)
if 'end' in kw:
self._endframe = (self._startframe if self._startframe is not None else 0) + int(round(float(kw['end'])*self.framerate())) # for __repr__
if 'start' in kw:
pass
if 'start_frame' in kw or 'end_frame' in kw:
f = f.setpts('PTS-STARTPTS') # reset timestamp to 0 before trim filter in seconds
if 'end_frame' in kw:
self._endframe = (self._startframe if self._startframe is not None else 0) + int(kw['end_frame']) # for __repr__
kw['end'] = int(kw['end_frame'])/self.framerate() # convert end_frame to end (legacy)
del kw['end_frame'] # use only end and not end frame
if 'start_frame' in kw:
self._startframe = (self._startframe if self._startframe is not None else 0) + int(kw['start_frame']) # for __repr__
kw['start'] = int(kw['start_frame'])/self.framerate() # convert start_frame to start (legacy)
del kw['start_frame'] # use only start and not start_frame
f = f.filter(filtername, *a, **kw)
if strict:
assert self._ffmpeg_commandline(f.output('dummyfile')) == cmd, "FFMPEG command line '%s' != '%s'" % (self._ffmpeg_commandline(f.output('dummyfile')), cmd)
return f
def _isdirty(self):
"""Has the FFMPEG filter chain been modified from the default? If so, then ffplay() on the video file will be different from self.load().play()"""
return '-filter_complex' in self._ffmpeg_commandline()
def probeshape(self):
"""Return the (height, width) of underlying video file as determined from ffprobe
.. warning:: this does not take into account any applied ffmpeg filters. The shape will be the (height, width) of the underlying video file.
"""
p = self.probe()
assert len(p['streams']) > 0
return (p['streams'][0]['height'], p['streams'][0]['width'])
def duration_in_seconds_of_videofile(self):
"""Return video duration of the source filename (NOT the filter chain) in seconds, requires ffprobe. Fetch once and cache.
.. notes:: This is the duration of the source video and NOT the duration of the filter chain. If you load(), this may be different duration depending on clip() or framerate() directives.
"""
filehash = hashlib.md5(str(self.downloadif().filename()).encode()).hexdigest()
if self.hasattribute('_duration_in_seconds_of_videofile') and self.attributes['__duration_in_seconds_of_videofile']['filehash'] == filehash:
return self.attributes['__duration_in_seconds_of_videofile']['duration']
else:
d = float(self.probe()['format']['duration'])
self.attributes['__duration_in_seconds_of_videofile'] = {'duration':d, 'filehash':filehash} # for next time, private attribute
return d
def duration_in_frames_of_videofile(self):
"""Return video duration of the source video file (NOT the filter chain) in frames, requires ffprobe.
.. notes:: This is the duration of the source video and NOT the duration of the filter chain. If you load(), this may be different duration depending on clip() or framerate() directives.
"""
return int(np.floor(self.duration_in_seconds_of_videofile()*self.framerate_of_videofile()))
def duration(self, frames=None, seconds=None, minutes=None):
"""Return a video clipped with frame indexes between (0, frames) or (0,seconds*self.framerate()) or (0,minutes*60*self.framerate(). Return duration in seconds if no arguments are provided."""
if frames is None and seconds is None and minutes is None:
return self.duration_in_seconds_of_videofile() if not self.isloaded() else (len(self) / self.framerate())
assert frames is not None or seconds is not None or minutes is not None
frames = frames if frames is not None else ((int(seconds*self.framerate()) if seconds is not None else 0) + (int(minutes*60*self.framerate()) if minutes is not None else 0))
return self.clip(0, frames)
def duration_in_frames(self):
"""Return the duration of the video filter chain in frames, equal to round(self.duration()*self.framerate()). Requires a probe() of the video to get duration"""
return int(round(self.duration()*self.framerate()))
def framerate_of_videofile(self):
"""Return video framerate in frames per second of the source video file (NOT the filter chain), requires ffprobe.
"""
p = self.probe()
assert 'streams' in p and len(['streams']) > 0
fps = p['streams'][0]['avg_frame_rate']
return float(fps) if '/' not in fps else (float(fps.split('/')[0]) / float(fps.split('/')[1])) # fps='30/1' or fps='30.0'
def resolution_of_videofile(self):
"""Return video resolution in (height, width) in pixels (NOT the filter chain), requires ffprobe.
"""
p = self.probe()
assert 'streams' in p and len(['streams']) > 0
(H,W) = (p['streams'][0]['height'], p['streams'][0]['width']) # (height, width) in pixels
return (W,H) if ('tags' in p['streams'][0] and 'rotate' in p['streams'][0]['tags'] and p['streams'][0]['tags']['rotate'] in ['90','270']) else (H,W)
def probe(self, **kwargs):
"""Run ffprobe on the filename and return the result as a dictionary
Args:
Any keyword arguments supported by python-ffmpeg probe() - these are passed in as-is
- for flags, use flag_name=None (e.g., show_frames=None) so that ffmpeg.probe() handles them correctly
"""
if not has_ffprobe:
raise ValueError('"ffprobe" executable not found on path, this is optional for vipy.video - Install from http://ffmpeg.org/download.html')
assert self.downloadif().hasfilename(), "Invalid video file '%s' for ffprobe" % self.filename()
return ffmpeg.probe(self.filename(), **kwargs)
def frame_meta(self, k=None):
"""Return the frame metadata of the underlying video file using ffprobe for all frames as a list of dicts, each list element corresponding to a frame. This is useful for extracting frame types (e.g. i-frames).
Args:
k [int]: Return only the frame metadata for frame index k (relative to framerate of source videofile, not filter chain)
Returns:
a list of metadata dicts (one per frame) or a single dict for the requested frame.
.. notes::
- This will return a large amount of metadata for the entire source video (not the FFMPEG filter chain), use with caution.
- To get frame metata for a filter chain use vipy.video.Video.savetemp().frame_meta(), which will save the video to a temporary file prior to extracting frame metadata
"""
d = self.probe(show_frames=None).get('frames')
return d if k is None else d[k]
def metaframe(self, k=None):
"""Alias for `vipy.video.Video.frame_meta`"""
return self.frame_meta(k)
def iframes(self):
"""Return a list of i-frame indexes (e.g. intra-frame, a video frame that is independent from other frames for decoding) in this video file.
.. note::
- To return the i-frame indexes for the current filter chain use self.saveas().iframes() to save to a temporary file prior to i-frame index extraction.
- To extract the i-frame itself, use [self.frame(k) for k in self.iframes()]
"""
return [k for (k,d) in enumerate(self.metaframe()) if d['pict_type'] == 'I']
def print(self, prefix='', verbose=True, sleep=None):
"""Print the representation of the video
This is useful for debugging in long fluent chains. Sleep is useful for adding in a delay for distributed processing.
Args:
prefix: prepend a string prefix to the video __repr__ when printing. Useful for logging.
verbose: Print out the video __repr__. Set verbose=False to just sleep
sleep: Integer number of seconds to sleep[ before returning
fluent [bool]: If true, return self else return None. This is useful for terminating long fluent chains in lambdas that return None
Returns:
The video object after sleeping
"""
if verbose:
print(prefix+self.__repr__())
if sleep is not None:
assert isinstance(sleep, int) and sleep > 0, "Sleep must be a non-negative integer number of seconds"
time.sleep(sleep)
return self
def printif(self, b, prefix='', verbose=True, sleep=None):
"""Call `vipy.video.Video.print` if b=True. Useful for fluent chains to print periodically."""
assert isinstance(b, bool)
return self.print(prefix=prefix, verbose=b, sleep=sleep)
def __array__(self):
"""Called on np.array(self) for custom array container, (requires numpy >=1.16)"""
return self.numpy()
def dict(self):
"""Return a python dictionary containing the relevant serialized attributes suitable for JSON encoding."""
return self.json(encode=False)
def __json__(self):
"""Serialization method for json package"""
return self.json(encode=True)
def json(self, encode=True):
"""Return a json representation of the video.
Args:
encode: If true, return a JSON encoded string using json.dumps
Returns:
A JSON encoded string if encode=True, else returns a dictionary object
.. note:: If the video is loaded, then the JSON will not include the pixels. Try using `vipy.video.Video.store` to serialize videos, or call `vipy.video.Video.flush` first.
"""
if self.isloaded():
warnings.warn("JSON serialization of loaded video is inefficient. Try store()/restore()/unstore() instead to serialize videos as standalone objects efficiently, or flush() any loaded videos prior to serialization to quiet this warning.")
d = {'filename':self._filename,
'url':self._url,
'framerate':self._framerate,
'array':None,
'colorspace':self._colorspace,
'attributes':self.attributes,
'startframe':self._startframe,
'endframe':self._endframe,
'endsec':self._endsec,
'startsec':self._startsec,
'ffmpeg':self._ffmpeg_commandline()}
return json.dumps(d) if encode else d
def take(self, n):
"""Return n frames from the clip uniformly spaced as numpy array
Args:
n: Integer number of uniformly spaced frames to return
Returns:
A numpy array of shape (n,W,H)
.. warning:: This assumes that the entire video is loaded into memory (e.g. call `vipy.video.Video.load`). Use with caution.
"""
assert self.isloaded(), "load() is required before take()"
dt = int(np.round(len(self._array) / float(n))) # stride
return self._array[::dt][0:n]
def framerate(self, fps=None):
"""Change the input framerate for the video and update frame indexes for all annotations
Args:
fps: [Float] frames per second to process the underlying video
Returns:
If fps is None, return the current framerate, otherwise set the framerate to fps
"""
if fps is None:
return self._framerate
elif float(fps) == self._framerate:
return self
else:
assert not self.isloaded(), "Filters can only be applied prior to load()"
if 'fps=' in self._ffmpeg_commandline():
self._update_ffmpeg('fps', float(fps)) # replace fps filter, do not add to it
else:
self._ffmpeg = self._ffmpeg.filter('fps', fps=float(fps), round='up') # create fps filter first time
# if '-ss' in self._ffmpeg_commandline():
# No change is needed here. The seek is in seconds and is independent of the framerate
# if 'trim' in self._ffmpeg_commandline():
# No change is needed here. The trim is in units of seconds which is independent of the framerate
self._framerate = float(fps)
return self
def colorspace(self, colorspace=None):
"""Return or set the colorspace as ['rgb', 'bgr', 'lum', 'float']. This will not change pixels, only the colorspace interpretation of pixels."""
if colorspace is None:
return self._colorspace
elif self.isloaded():
assert str(colorspace).lower() in ['rgb', 'bgr', 'lum', 'float']
if self.array().dtype == np.float32:
assert str(colorspace).lower() in ['float']
elif self.array().dtype == np.uint8:
assert str(colorspace).lower() in ['rgb', 'bgr', 'lum']
if str(colorspace).lower() in ['lum']:
assert self.channels() == 1, "Luminance colorspace must be one channel uint8"
elif str(colorspace).lower() in ['rgb', 'bgr']:
assert self.channels() == 3, "RGB or BGR colorspace must be three channel uint8"
else:
raise ValueError('Invalid array() type "%s" - only np.float32 or np.uint8 allowed' % str(self.array().dtype))
self._colorspace = str(colorspace).lower()
return self
def nourl(self):
"""Remove the `vipy.video.Video.url` from the video"""
(self._url, self._urluser, self._urlpassword, self._urlsha1) = (None, None, None, None)
return self
def url(self, url=None, username=None, password=None, sha1=None):
"""Video URL and URL download properties"""
if url is not None:
self._url = url # note that this does not change anything else, better to use the constructor for this
if url is not None and (isRTSPurl(url) or isRTMPurl(url)):
self.filename(self._url)
if username is not None:
self._urluser = username # basic authentication
if password is not None:
self._urlpassword = password # basic authentication
if sha1 is not None:
self._urlsha1 = sha1 # file integrity
if url is None and username is None and password is None and sha1 is None:
return self._url
else:
return self
def isloaded(self):
"""Return True if the video has been loaded"""
return self._array is not None
def isloadable(self, flush=True):
"""Return True if the video can be loaded successfully.
This is useful for filtering bad videos or filtering videos that cannot be loaded using your current FFMPEG version.
Args:
flush: [bool] If true, flush the video after it loads. This will clear the video pixel buffer
Returns:
True if load() can be called without FFMPEG exception.
If flush=False, then self will contain the loaded video, which is helpful to avoid load() twice in some conditions
.. warning:: This requires loading and flushing the video. This is an expensive operation when performed on many videos and may result in out of memory conditions with long videos. Use with caution! Try `vipy.video.Video.canload` to test if a single frame can be loaded as a less expensive alternative.
"""
if not self.isloaded():
try:
self.load() # try to load the whole thing
if flush:
self.flush()
return True
except:
return False
else:
return True
def canload(self, frame=0):
"""Return True if the video can be previewed at frame=k successfully.
This is useful for filtering bad videos or filtering videos that cannot be loaded using your current FFMPEG version.
.. notes:: This will only try to preview a single frame. This will not check if the entire video is loadable. Use `vipy.video.Video.isloadable` in this case
This will hang if calling canload on a streaming URL.
"""
if not self.isloaded():
try:
self.preview(framenum=frame) # try to preview
return True
except:
return False
else:
return True
def iscolor(self):
"""Is the video a three channel color video as returned from `vipy.video.Video.channels`?"""
return self.channels() == 3
def isstreaming(self):
return isRTSPurl(self._filename) or isRTMPurl(self._filename)
def isgrayscale(self):
"""Is the video a single channel as returned from `vipy.video.Video.channels`?"""
return self.channels() == 1
def hasfilename(self):
"""Does the filename returned from `vipy.video.Video.filename` exist?"""
return self._filename is not None and (os.path.exists(self._filename) or isRTSPurl(self._filename) or isRTMPurl(self._filename))
def isdownloaded(self):
"""Does the filename returned from `vipy.video.Video.filename` exist, meaning that the url has been downloaded to a local file?"""
return self._filename is not None and os.path.exists(self._filename)
def hasurl(self):
"""Is the url returned from `vipy.video.Video.url` a well formed url?"""
return self._url is not None and isurl(self._url)
def islive(self):
return self.hasurl() and (isRTSPurl(self._url) or isRTMPurl(self._url))
def array(self, array=None, copy=False):
"""Set or return the video buffer as a numpy array.
Args:
array: [np.array] A numpy array of size NxHxWxC = (frames, height, width, channels) of type uint8 or float32.
copy: [bool] If true, copy the buffer by value instaed of by reference. Copied buffers do not share pixels.
Returns:
if array=None, return a reference to the pixel buffer as a numpy array, otherwise return the video object.
"""
if array is None:
return self._array
elif isnumpy(array):
assert array.dtype == np.float32 or array.dtype == np.uint8, "Invalid input - array() must be type uint8 or float32"
assert array.ndim == 4, "Invalid input array() must be of shape NxHxWxC, for N frames, of size HxW with C channels"
self._array = np.copy(array) if copy else array
if copy:
self._array.setflags(write=True) # mutable iterators, triggers copy
self.colorspace(None) # must be set with colorspace() after array() before _convert()
return self
else:
raise ValueError('Invalid input - array() must be numpy array')
def fromarray(self, array):
"""Alias for self.array(..., copy=True), which forces the new array to be a copy"""
return self.array(array, copy=True)
def fromdirectory(self, indir, sortkey=None):
"""Create a video from a directory of frames stored as individual image filenames.
Given a directory with files:
framedir/image_0001.jpg
framedir/image_0002.jpg
```python
vipy.video.Video(frames='/path/to/framedir')
```
"""
return self.fromframes([vipy.image.Image(filename=f) for f in sorted(vipy.util.imlist(indir), key=sortkey)])
def fromframes(self, framelist, copy=True):
"""Create a video from a list of frames"""
assert all([isinstance(im, vipy.image.Image) for im in framelist]), "Invalid input"
return self.array(np.stack([im.load().array() if im.load().array().ndim == 3 else np.expand_dims(im.load().array(), 2) for im in framelist]), copy=copy).colorspace(framelist[0].colorspace())
def tonumpy(self):
"""Alias for numpy()"""
return self.numpy()
def mutable(self):
"""Return a video object with a writeable mutable frame array. Video must be loaded, triggers copy of underlying numpy array if the buffer is not writeable.
Returns:
This object with a mutable frame buffer in self.array() or self.numpy()
"""
assert self.isloaded()
self._array = np.copy(self._array) if not self._array.flags['WRITEABLE'] else self._array # triggers copy
self._array.setflags(write=True) # mutable iterators, torch conversion
return self
def numpy(self):
"""Convert the video to a writeable numpy array, triggers a load() and copy() as needed. Returns the numpy array."""
self.load()
self._array = np.copy(self._array) if not self._array.flags['WRITEABLE'] else self._array # triggers copy
self._array.setflags(write=True) # mutable iterators, torch conversion
return self._array
def zeros(self):
self._array = 0*self.load()._array
return self
def reload(self):
return self.clone(flush=True).load()
def nofilename(self):
self._filename = None
self._update_ffmpeg('filename', None)
return self
def filename(self, newfile=None, copy=False, symlink=False):
"""Update video Filename with optional copy or symlink from existing file (self.filename()) to new file"""
if newfile is None:
return self._filename
if not isurl(newfile):
# Copy or symlink from the old filename to the new filename (if requested)
newfile = os.path.normpath(os.path.expanduser(newfile))
if copy:
assert self.hasfilename(), "File not found for copy"
remkdir(filepath(newfile))
shutil.copyfile(self._filename, newfile)
elif symlink:
assert self.hasfilename(), "File not found for symlink"
remkdir(filepath(newfile))
if os.path.islink(newfile) and os.path.abspath(os.readlink(newfile)) == os.path.normpath(os.path.abspath(os.path.expanduser(self.filename()))):
pass # already points to the same place, nothing to do
else:
os.symlink(self._filename, newfile)
# Update ffmpeg filter chain with new input node filename (this file may not exist yet)
self._update_ffmpeg('filename', newfile)
self._filename = newfile
return self
def abspath(self):
"""Change the path of the filename from a relative path to an absolute path (not relocatable)"""
return self.filename(os.path.normpath(os.path.abspath(os.path.expanduser(self.filename()))))
def relpath(self, parent=None, start=None):
"""Replace the filename with a relative path to parent (or current working directory if none).
Usage:
```python
v = vipy.video.Video(filename='/path/to/dataset/video/category/out.mp4')
v.relpath(parent='/path/to/dataset')
v.filename() == 'video/category/out.mp4'
```
If the current working directory is /path/to/dataset, and v.load() is called, the filename will be loaded.
Args:
parent [str]: A parent path of the current filename to remove and be relative to. If filename is '/path/to/video.mp4' then filename must start with parent, then parent will be remvoed from filename.
start [str]: Return a relative filename starting from path start='/path/to/dir' that will create a relative path to this filename. If start='/a/b/c' and filename='/a/b/d/e/f.ext' then return filename '../d/e/f.ext'
Returns:
This video object with the filename changed to be a relative path
"""
assert parent is not None or start is not None
if parent is not None:
parent = parent if parent is not None else os.getcwd()
assert parent in os.path.expanduser(self.filename()), "Parent path '%s' not found in abspath '%s'" % (parent, self.filename())
self.filename(PurePath(os.path.expanduser(self.filename())).relative_to(parent))
if start is not None:
self.filename(os.path.join(os.path.relpath(os.path.expanduser(filepath(self.filename())), start), filetail(self.filename())))
return self
def rename(self, newname):
"""Move the underlying video file preserving the absolute path, such that self.filename() == '/a/b/c.ext' and newname='d.ext', then self.filename() -> '/a/b/d.ext', and move the corresponding file"""
newfile = os.path.join(filepath(self.filename()), newname)
shutil.move(self.filename(), newfile)
return self.filename(newfile)
def filesize(self):
"""Return the size in bytes of the filename(), None if the filename() is invalid"""
return os.path.getsize(self.filename()) if self.hasfilename() else None
def downloadif(self, ignoreErrors=False, timeout=10, verbose=True, max_filesize='350m'):
"""Download URL to filename if the filename has not already been downloaded"""
return self.download(ignoreErrors=ignoreErrors, timeout=timeout, verbose=verbose, max_filesize=max_filesize) if self.hasurl() and not self.isdownloaded() else self
def download(self, ignoreErrors=False, timeout=10, verbose=True, max_filesize='350m'):
"""Download URL to filename provided by constructor, or to temp filename.
Args:
ignoreErrors: [bool] If true, show a warning and return the video object, otherwise throw an exception
timeout: [int] An integer timeout in seconds for the download to connect
verbose: [bool] If trye, show more verbose console output
max_filesize: [str] A string of the form 'NNNg' or 'NNNm' for youtube downloads to limit the maximum size of a URL to '350m' 350MB or '12g' for 12GB.
Returns:
This video object with the video downloaded to the filename()
"""
if self._url is None and self._filename is not None:
return self
if self._url is None:
raise ValueError('[vipy.video.download]: No URL to download')
elif not isurl(str(self._url)):
raise ValueError('[vipy.video.download]: Invalid URL "%s" ' % self._url)
try:
url_scheme = urllib.parse.urlparse(self._url)[0]
if isyoutubeurl(self._url):
f = self._filename if filefull(self._filename) is None else filefull(self._filename)
vipy.videosearch.download(self._url, f, writeurlfile=False, skip=ignoreErrors, verbose=verbose, max_filesize=max_filesize)
for ext in ['mkv', 'mp4', 'webm']:
f = '%s.%s' % (self.filename(), ext)
if os.path.exists(f):
self.filename(f) # change the filename to match the youtube extension
break
if not self.hasfilename():
raise ValueError('Downloaded file not found "%s.*"' % self.filename())
elif url_scheme in ['http', 'https'] and (isvideourl(self._url) or iswebp(self._url)):
vipy.downloader.download(self._url,
self._filename,
verbose=verbose,
timeout=timeout,
sha1=None,
username=None,
password=None)
elif url_scheme == 'file':
shutil.copyfile(self._url, self._filename)
elif url_scheme == 's3':
if self.filename() is None:
self.filename(totempdir(self._url))
if vipy.globals.cache() is not None:
self.filename(os.path.join(remkdir(vipy.globals.cache()), filetail(self._url)))
vipy.downloader.s3(self.url(), self.filename(), verbose=verbose)
elif url_scheme == 'scp':
if self.filename() is None:
self.filename(templike(self._url))
if vipy.globals.cache() is not None:
self.filename(os.path.join(remkdir(vipy.globals.cache()), filetail(self._url)))
vipy.downloader.scp(self._url, self.filename(), verbose=verbose)
elif not isvideourl(self._url) and vipy.videosearch.is_downloadable_url(self._url):
vipy.videosearch.download(self._url, filefull(self._filename), writeurlfile=False, skip=ignoreErrors, verbose=verbose, max_filesize=max_filesize)
for ext in ['mkv', 'mp4', 'webm']:
f = '%s.%s' % (self.filename(), ext)
if os.path.exists(f):
self.filename(f)
break
if not self.hasfilename():
raise ValueError('Downloaded filenot found "%s.*"' % self.filename())
elif url_scheme == 'rtsp':
# https://ffmpeg.org/ffmpeg-protocols.html#rtsp
pass
else:
raise NotImplementedError(
'Invalid URL scheme "%s" for URL "%s"' %
(url_scheme, self._url))
except (httplib.BadStatusLine,
urllib.error.URLError,
urllib.error.HTTPError):
if ignoreErrors:
warnings.warn('[vipy.video][WARNING]: download failed - Ignoring Video')
self._array = None
else:
raise
except IOError:
if ignoreErrors:
warnings.warn('[vipy.video][WARNING]: IO error - Invalid video file, url or invalid write permissions "%s" - Ignoring video' % self.filename())
self._array = None
else:
raise
except KeyboardInterrupt:
raise
except Exception:
if ignoreErrors:
warnings.warn('[vipy.video][WARNING]: load error for video "%s"' % self.filename())
else:
raise
return self
def fetch(self, ignoreErrors=False):
"""Download only if hasfilename() is not found"""
return self.download(ignoreErrors=ignoreErrors) if not self.hasfilename() else self
def shape(self, shape=None, probe=False):
"""Return (height, width) of the frames, requires loading a preview frame from the video if the video is not already loaded, or providing the shape=(height,width) by the user"""
if probe:
# Set the shape of the video from the filename by ffprobe, this should be deprecated
return self.shape(self.probeshape(), probe=False)
elif shape is not None:
# Set the shape of the video using the shape provided by the user (e.g. sometimes the user knows what this will be)
assert isinstance(shape, tuple), "shape=(height, width) tuple"
self._shape = shape
self._channels = self.channels()
#self._previewhash = hashlib.md5(str(self._ffmpeg_commandline()).encode()).hexdigest()
return self
elif not self.isloaded():
# Preview a frame from the ffmpeg filter chain (more expensive)
if self._shape is None or len(self._shape) == 0: # dirty filter chain
im = self.preview() # ffmpeg chain changed, load a single frame of video, triggers fetch
self._shape = (im.height(), im.width()) # cache the shape
self._channels = im.channels()
#self._previewhash = previewhash
return self._shape
else:
# Frames already loaded - get shape from numpy array
return (self._array.shape[1], self._array.shape[2])
def channelshape(self):
"""Return a tuple (channels, height, width) for the video"""
return (self.channels(), self.height(), self.width())
def issquare(self):
"""Return true if the video has square dimensions (height == width), else false"""
s = self.shape()
return s[0] == s[1]
def channels(self):
"""Return integer number of color channels"""
if not self.isloaded():
self._channels = 3 # always color video
#previewhash = hashlib.md5(str(self._ffmpeg_commandline()).encode()).hexdigest()
#if not hasattr(self, '_previewhash') or previewhash != self._previewhash:
# im = self.preview() # ffmpeg chain changed, load a single frame of video
# self._shape = (im.height(), im.width()) # cache the shape
# self._channels = im.channels() # cache
# self._previewhash = previewhash
return self._channels # cached
else:
return 1 if self.load().array().ndim == 3 else self.load().array().shape[3]
def width(self):
"""Width (cols) in pixels of the video for the current filter chain"""
return self.shape()[1]
def height(self):
"""Height (rows) in pixels of the video for the current filter chain"""
return self.shape()[0]
def aspect_ratio(self):
"""The width/height of the video expressed as a fraction"""
return self.width() / self.height()
def preview(self, framenum=0):
"""Return selected frame of filtered video, return vipy.image.Image object. This is useful for previewing the frame shape of a complex filter chain or the frame contents at a particular location without loading the whole video"""
if self.isloaded():
return self[framenum]
elif self.hasurl() and not self.hasfilename():
self.download(verbose=True)
if not self.hasfilename():
raise ValueError('Video file not found')
if iswebp(self.filename()) or isgif(self.filename()):
return self.load().frame(framenum)
# Convert frame to mjpeg and pipe to stdout, used to get dimensions of video
# - The MJPEG encoder will generally output lower quality than H.264 encoded frames
# - This means that frame indexing from preview() will generate slightly different images than streaming raw
# - Beware running convnets, as the pixels will be slightly different (~4 grey levels in uint8) ...
try:
# FFMPEG frame indexing is inefficient for large framenum. Need to add "-ss sec.msec" flag before input
# - the "ss" option must be provided before the input filename, and is supported by ffmpeg-python as ".input(in_filename, ss=time)"
# - Seek to the frame before the desired frame in order to pipe the next (desired) frame
timestamp_in_seconds = max(0.0, (framenum-1)/float(self.framerate()))
f_prepipe = self.clone(shallow=True)._update_ffmpeg_seek(offset=timestamp_in_seconds)._ffmpeg.filter('select', 'gte(n,{})'.format(0))
f = f_prepipe.output('pipe:', vframes=1, format='image2', vcodec='mjpeg')\
.global_args('-cpuflags', '0', '-loglevel', 'debug' if vipy.globals.isdebug() else 'error')
(out, err) = f.run(capture_stdout=True, capture_stderr=True)
except Exception as e:
raise ValueError('[vipy.video.load]: Video preview failed with error "%s"\n - Video: "%s"\n - FFMPEG command: \'sh> %s\'\n - Try manually running this ffmpeg command to see errors. This error usually means that the video is corrupted.' % (str(e), str(self), str(self._ffmpeg_commandline(f_prepipe.output('preview.jpg', vframes=1)))))
# [EXCEPTION]: UnidentifiedImageError: cannot identify image file, means usually that FFMPEG piped a zero length image
try:
return Image(array=np.array(PIL.Image.open(BytesIO(out))))
except Exception as e:
print('[vipy.video.Video.preview][ERROR]: %s' % str(e))
print(' - FFMPEG attempted to extract a single frame from the following video and failed:\n %s' % str(self))
print(' - This may occur after calling clip() with too short a duration, try increasing the clip to be > 1 sec')
print(' - This may occur after calling clip() with a startframe or endframe outside the duration of the video')
print(' - This may occur if requesting a frame number greater than the length of the video. At this point, we do not know the video length, and cannot fail gracefully')
print(' - This may occur when the framerate of the video from ffprobe (tbr) does not match that passed to fps filter, resulting in a zero length image preview piped to stdout')
print(' - This may occur if the filter chain fails for some unknown reason on this video. Try running this ffmpeg command manually and inspect the FFMPEG console output:\n sh> %s' % str(self._ffmpeg_commandline(f_prepipe.output('preview.jpg', vframes=1))))
raise
def thumbnail(self, outfile=None, frame=0):
"""Return annotated frame=k of video, save annotation visualization to provided outfile.
This is functionally equivalent to `vipy.video.Video.frame` with an additional outfile argument to easily save an annotated thumbnail image.
Args:
outfile: [str] an optional outfile to save the annotated frame
frame: [int >= 0] The frame to output the thumbnail
Returns:
A `vipy.image.Image` object for frame k.
"""
im = self.frame(frame, img=self.preview(frame).array())
return im.savefig(outfile) if outfile is not None else im
def load(self, verbose=False, ignoreErrors=False, shape=None):
"""Load a video using ffmpeg, applying the requested filter chain.
Args:
verbose: [bool] if True. then ffmpeg console output will be displayed.
ignoreErrors: [bool] if True, then all load errors are warned and skipped. Be sure to call isloaded() to confirm loading was successful.
shape: [tuple (height, width, channels)] If provided, use this shape for reading and reshaping the byte stream from ffmpeg. This is useful for efficient loading in some scenarios. Knowing the final output shape can speed up loads by avoiding a preview() of the filter chain to get the frame size
Returns:
this video object, with the pixels loaded in self.array()
.. warning:: Loading long videos can result in out of memory conditions. Try to call clip() first to extract a video segment to load().
"""
if self.isloaded():
return self
elif not self.hasfilename() and self.hasurl():
self.download(ignoreErrors=ignoreErrors)
elif not self.hasfilename() and not ignoreErrors:
raise ValueError('Invalid input - load() requires a valid URL, filename or array')
if not self.hasfilename() and ignoreErrors:
print('[vipy.video.load]: Video file "%s" not found - Ignoring' % self.filename())
return self
if iswebp(self.filename()) or isgif(self.filename()):
frames = []
pil = PIL.Image.open(self.filename())
self._framerate = (1000.0 / pil.info['duration']) if 'duration' in pil.info else self._framerate
for k in range(pil.n_frames):
pil.seek(k)
frames.append(np.array(pil.convert('RGB')))
return self.array(np.stack(frames)).colorspace('RGB')
# Load the video with ffmpeg
#
# [EXCEPTION]: older ffmpeg versions may segfault on complex crop filter chains
# -On some versions of ffmpeg setting -cpuflags=0 fixes it, but the right solution is to rebuild from the head (30APR20)
if verbose:
print('[vipy.video.load]: Loading "%s"' % self.filename())
try:
f_prepipe = copy.deepcopy(self._ffmpeg)
f = self._ffmpeg.output('pipe:', format='rawvideo', pix_fmt='rgb24')\
.global_args('-cpuflags', '0', '-loglevel', 'debug' if vipy.globals.isdebug() else 'quiet')
(out, err) = f.run(capture_stdout=True, capture_stderr=True)
except Exception as e:
if not ignoreErrors:
raise ValueError('[vipy.video.load]: Load failed with error "%s"\n\n - Video: "%s"\n - FFMPEG command: \'sh> %s\'\n - This error usually means that the video is corrupted or that you need to upgrade your FFMPEG distribution to the latest stable version.\n - Try running the output of the ffmpeg command for debugging.' % (str(e), str(self), str(self._ffmpeg_commandline(f_prepipe.output('preview.mp4')))))
else:
return self # Failed, return immediately, useful for calling canload()
# Video shape:
# - due to complex filter chains, we may not know the final video size without executing it
# - However, this introduces extra cost by calling preview() on each filter chain
# - If we know what the shape will be (e.g. we made the video square with a known size), then use it here directly
(height, width, channels) = (self.height(), self.width(), self.channels()) if shape is None else shape
# [EXCEPTION]: older ffmpeg versions may be off by one on the size returned from self.preview() which uses an image decoder vs. f.run() which uses a video decoder
# -Try to correct this manually by searching for a off-by-one-pixel decoding that works. The right way is to upgrade your FFMPEG version to the FFMPEG head (11JUN20)
# -We cannot tell which is the one that the end-user wanted, so we leave it up to the calling function to check dimensions (see self.resize())
if (len(out) % (height*width*channels)) != 0:
#warnings.warn('Your FFMPEG version is triggering a known bug that is being worked around in an inefficient manner. Consider upgrading your FFMPEG distribution.')
if (len(out) % ((height-1)*(width-1)*channels) == 0):
(newwidth, newheight) = (width-1, height-1)
elif (len(out) % ((height-1)*(width)*channels) == 0):
(newwidth, newheight) = (width, height-1)
elif (len(out) % ((height-1)*(width+1)*channels) == 0):
(newwidth, newheight) = (width+1, height-1)
elif (len(out) % ((height)*(width-1)*channels) == 0):
(newwidth, newheight) = (width-1, height)
elif (len(out) % ((height)*(width+1)*channels) == 0):
(newwidth, newheight) = (width+1, height)
elif (len(out) % ((height+1)*(width-1)*channels) == 0):
(newwidth, newheight) = (width-1, height+1)
elif (len(out) % ((height+1)*(width)*channels) == 0):
(newwidth, newheight) = (width, height+1)
elif (len(out) % ((height+1)*(width+1)*channels) == 0):
(newwidth, newheight) = (width+1, height+1)
else:
(newwidth, newheight) = (width, height)
is_loadable = (len(out) % (newheight*newwidth*channels)) == 0
if not is_loadable:
im = self.preview() # get the real shape...
(newheight, newwidth, newchannels) = (im.height(), im.width(), im.channels())
assert is_loadable or ignoreErrors, "Load failed for video '%s', and FFMPEG command line: '%s'" % (str(self), str(self._ffmpeg_commandline(f)))
self._array = np.frombuffer(out, np.uint8).reshape([-1, newheight, newwidth, channels]) if is_loadable else None # read-only
self.colorspace('rgb' if channels == 3 else 'lum')
self.resize(rows=height, cols=width) # Very expensive framewise resizing so that the loaded video is identical shape to preview
else:
self._array = np.frombuffer(out, np.uint8).reshape([-1, height, width, channels]) # read-only
self.colorspace('rgb' if channels == 3 else 'lum')
return self
def speed(self, s):
"""Change the speed by a multiplier s. If s=1, this will be the same speed, s=0.5 for half-speed (slower playback), s=2 for double-speed (faster playback)"""
assert s > 0, "Invalid input"
self._ffmpeg = self._ffmpeg.setpts('%1.3f*PTS' % float(1.0/float(s)))
return self
def clip(self, startframe, endframe=None):
"""Load a video clip betweeen start and end frames"""
assert (endframe is None or startframe <= endframe) and startframe >= 0, "Invalid start and end frames (%s, %s)" % (str(startframe), str(endframe))
if not self.isloaded():
timestamp_in_seconds = ((self._startframe if self._startframe is not None else 0)+startframe)/float(self.framerate())
self._update_ffmpeg_seek(timestamp_in_seconds)
if endframe is not None:
self._ffmpeg = self._ffmpeg.setpts('PTS-STARTPTS') # reset timestamp to 0 before trim filter
self._ffmpeg = self._ffmpeg.trim(start=0, end=(endframe-startframe)/self.framerate()) # must be in seconds to allow for framerate conversion
self._ffmpeg = self._ffmpeg.setpts('PTS-STARTPTS') # reset timestamp to 0 after trim filter
self._startframe = startframe if self._startframe is None else self._startframe + startframe # for __repr__ only
self._endframe = (self._startframe + (endframe-startframe)) if endframe is not None else endframe # for __repr__ only
else:
endframe = endframe if endframe is not None else len(self._array)
self._array = self._array[startframe:endframe]
(self._startframe, self._endframe) = (0, endframe-startframe)
return self
def cliprange(self):
"""Return the planned clip (startframe, endframe) range.
This is useful for introspection of the planned clip() before load(), such as for data augmentation purposes without triggering a load.
Returns:
(startframe, endframe) of the video() such that after load(), the pixel buffer will contain frame=0 equivalent to startframe in the source video, and frame=endframe-startframe-1 equivalent to endframe in the source video.
(0, None) If a video does not have a clip() (e.g. clip() was never called, the filter chain does not include a 'trim')
.. notes:: The endframe can be retrieved (inefficiently) using:
```python
int(round(self.duration_in_frames_of_videofile() * (self.framerate() / self.framerate_of_videofile())))
```
"""
return (self._startframe if self._startframe is not None else 0, self._endframe)
#def cliptime(self, startsec, endsec):
# """Load a video clip betweeen start seconds and end seconds, should be initialized by constructor, which will work but will not set __repr__ correctly"""
# assert startsec <= endsec and startsec >= 0, "Invalid start and end seconds (%s, %s)" % (str(startsec), str(endsec))
# assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
# self._ffmpeg = self._ffmpeg.trim(start=startsec, end=endsec)\
# .setpts('PTS-STARTPTS') # reset timestamp to 0 after trim filter
# self._startsec = startsec if self._startsec is None else self._startsec + startsec # for __repr__ only
# self._endsec = endsec if self._endsec is None else self._startsec + (endsec-startsec) # for __repr__ only
# return self
def rot90cw(self):
"""Rotate the video 90 degrees clockwise, can only be applied prior to load()"""
assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
self.shape(shape=(self.width(), self.height())) # transposed
self._ffmpeg = self._ffmpeg.filter('transpose', 1)
return self
def rot90ccw(self):
"""Rotate the video 90 degrees counter-clockwise, can only be applied prior to load()"""
assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
self.shape(shape=(self.width(), self.height())) # transposed
self._ffmpeg = self._ffmpeg.filter('transpose', 2)
return self
def fliplr(self):
"""Mirror the video left/right by flipping horizontally"""
if not self.isloaded():
self._ffmpeg = self._ffmpeg.filter('hflip')
else:
self.array(np.stack([np.fliplr(f) for f in self._array]), copy=False)
return self
def flipud(self):
"""Rotate the video 90 degrees counter-clockwise, can only be applied prior to load()"""
assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
self._ffmpeg = self._ffmpeg.filter('vflip')
return self
def rescale(self, s):
"""Rescale the video by factor s, such that the new dimensions are (s*H, s*W), can only be applied prior to load()"""
if s == 1:
return self
assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
self.shape(shape=(int(np.round(self.height()*float(np.ceil(s*1e6)/1e6))), int(np.round(self.width()*float(np.ceil(s*1e6)/1e6))))) # update the known shape
self._ffmpeg = self._ffmpeg.filter('scale', 'iw*%1.6f' % float(np.ceil(s*1e6)/1e6), 'ih*%1.6f' % float(np.ceil(s*1e6)/1e6)) # ceil last significant digit to avoid off by one
return self
def resize(self, rows=None, cols=None, width=None, height=None):
"""Resize the video to be (rows=height, cols=width)"""
assert not (rows is not None and height is not None)
assert not (cols is not None and width is not None)
rows = rows if rows is not None else height
cols = cols if cols is not None else width
newshape = (rows if rows is not None else int(np.round(self.height()*(cols/self.width()))),
cols if cols is not None else int(np.round(self.width()*(rows/self.height()))))
if (rows is None and cols is None):
return self # only if strictly necessary
if not self.isloaded():
# Apply the scale filter:
# - Note that multiple calls to resize will process through the filter chain one by one in the order of resizing calls
# - This can introduce resizing artifacts if the resize is a downsample followed by an upsample
# - One common use case is to downsample a video for tracking, then apply the tracks to the original video
# - One approach: downsample the video, track at low resolution, upsample tracks back to the original resolution, then flush the filter chain to load the video at original resolution with no rescaling
# >>> vc = tracker(v.clone().mindim(128)).mindim(v.mindim()).clone(flushfilter=True)
self._ffmpeg = self._ffmpeg.filter('scale', cols if cols is not None else -1, rows if rows is not None else -1)
else:
# Do not use self.__iter__() which triggers copy for mutable arrays
#self.array(np.stack([Image(array=self._array[k]).resize(rows=rows, cols=cols).array() for k in range(len(self))]), copy=False)
# Faster: RGB->RGBX to allow for PIL.Image.fromarray() without tobytes() copy, padding faster than np.concatenate()
#self.array(np.stack([PIL.Image.fromarray(x, mode='RGBX').resize( (cols, rows), resample=PIL.Image.BILINEAR) for x in np.pad(self._array, ((0,0),(0,0),(0,0),(0,1)))])[:,:,:,:-1], copy=False) # RGB->RGBX->RGB
# Fastest: padding introduces more overhead than just accepting tobytes(), image size dependent?
self.array(np.stack([PIL.Image.fromarray(x).resize( (newshape[1], newshape[0]), resample=PIL.Image.BILINEAR) for x in np.ascontiguousarray(self._array)]), copy=False)
self.shape(shape=newshape) # manually set newshape
return self
def mindim(self, dim=None):
"""Resize the video so that the minimum of (width,height)=dim, preserving aspect ratio"""
(H,W) = self.shape() # yuck, need to get image dimensions before filter
return min(self.shape()) if dim is None else (self if min(H,W) == dim else (self.resize(cols=dim) if W<H else self.resize(rows=dim)))
def maxdim(self, dim=None):
"""Resize the video so that the maximum of (width,height)=dim, preserving aspect ratio"""
assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
(H,W) = self.shape() # yuck, need to get image dimensions before filter
return max(H,W) if dim is None else (self.resize(cols=dim) if W>H else self.resize(rows=dim))
def randomcrop(self, shape, withbox=False):
"""Crop the video to shape=(H,W) with random position such that the crop contains only valid pixels, and optionally return the box"""
assert shape[0] <= self.height() and shape[1] <= self.width() # triggers preview()
(xmin, ymin) = (np.random.randint(self.height()-shape[0]), np.random.randint(self.width()-shape[1]))
bb = vipy.geometry.BoundingBox(xmin=int(xmin), ymin=int(ymin), width=int(shape[1]), height=int(shape[0])) # may be outside frame
self.crop(bb, zeropad=True)
return self if not withbox else (self, bb)
def centercrop(self, shape, withbox=False):
"""Crop the video to shape=(H,W) preserving the integer centroid position, and optionally return the box"""
assert shape[0] <= self.height() and shape[1] <= self.width() # triggers preview()
bb = vipy.geometry.BoundingBox(xcentroid=float(self.width()/2.0), ycentroid=float(self.height()/2.0), width=float(shape[1]), height=float(shape[0])).int() # may be outside frame
self.crop(bb, zeropad=True)
return self if not withbox else (self, bb)
def centersquare(self):
"""Crop video of size (NxN) in the center, such that N=min(width,height), keeping the video centroid constant"""
return self.centercrop( (min(self.height(), self.width()), min(self.height(), self.width())))
def cropeven(self):
"""Crop the video to the largest even (width,height) less than or equal to current (width,height). This is useful for some codecs or filters which require even shape."""
return self.crop(vipy.geometry.BoundingBox(xmin=0, ymin=0, width=int(vipy.math.even(self.width())), height=int(vipy.math.even(self.height()))))
def maxsquare(self):
"""Pad the video to be square, preserving the upper left corner of the video"""
# This ffmpeg filter can throw the error: "Padded dimensions cannot be smaller than input dimensions." since the preview is off by one. Add one here to make sure.
# FIXME: not sure where in some filter chains this off-by-one error is being introduced, but probably does not matter since it does not affect any annotations
# and since the max square always preserves the scale and the upper left corner of the source video.
# FIXME: this may trigger an inefficient resizing operation during load()
if not self.issquare():
d = max(self.shape())
self._ffmpeg = self._ffmpeg.filter('pad', d+1, d+1, 0, 0)
self.shape(shape=(d+1, d+1))
return self.crop(vipy.geometry.BoundingBox(xmin=0, ymin=0, width=int(d), height=int(d)))
else:
return self
def minsquare(self):
"""Return a square crop of the video, preserving the upper left corner of the video"""
d = min(self.shape())
self.shape(shape=(d, d))
return self.crop(vipy.geometry.BoundingBox(xmin=0, ymin=0, width=int(d), height=int(d)))
def maxmatte(self):
"""Return a square video with dimensions (self.maxdim(), self.maxdim()) with zeropadded lack bars or mattes above or below the video forming a letterboxed video."""
return self.zeropad(max(1, int((max(self.shape()) - self.width())/2)), max(int((max(self.shape()) - self.height())/2), 1)).maxsquare()
def zeropad(self, padwidth, padheight):
"""Zero pad the video with padwidth columns before and after, and padheight rows before and after
.. notes:: Older FFMPEG implementations can throw the error "Input area #:#:#:# not within the padded area #:#:#:# or zero-sized, this is often caused by odd sized padding.
Recommend calling self.cropeven().zeropad(...) to avoid this
"""
assert isinstance(padwidth, int) and isinstance(padheight, int)
if not self.isloaded():
self.shape(shape=(self.height()+2*padheight, self.width()+2*padwidth)) # manually set shape to avoid preview
self._ffmpeg = self._ffmpeg.filter('pad', 'iw+%d' % (2*padwidth), 'ih+%d' % (2*padheight), '%d'%padwidth, '%d'%padheight)
elif padwidth > 0 or padheight > 0:
self.array( np.pad(self.array(), ((0,0), (padheight,padheight), (padwidth,padwidth), (0,0)), mode='constant'), copy=False) # this is very expensive, since np.pad() must copy (once in np.pad >=1.17)
return self
def pad(self, padwidth=0, padheight=0):
"""Alias for zeropad"""
return self.zeropad(padwidth=padwidth, padheight=padheight)
def zeropadlike(self, width, height):
"""Zero pad the video balancing the border so that the resulting video size is (width, height)."""
assert width >= self.width() and height >= self.height(), "Invalid input - final (width=%d, height=%d) must be greater than current image size (width=%d, height=%d)" % (width, height, self.width(), self.height())
assert int(np.floor((width - self.width())/2)) == int(np.ceil((width - self.width())/2)), "Zero pad must be symmetric, this is often due to odd zeropadding which ffmpeg doesn't like. Try changing the width +/- 1 pixel"
assert int(np.floor((height - self.height())/2)) == int(np.ceil((height - self.height())/2)), "Zero pad must be symmetric, this is often due to odd zeropadding which ffmpeg doesn't like. Try changing the height +/- 1 pixel"
return self.zeropad(int(np.floor((width - self.width())/2)),
int(np.floor((height - self.height())/2)))
def crop(self, bbi, zeropad=True):
"""Spatially crop the video using the supplied vipy.geometry.BoundingBox, can only be applied prior to load().
.. note:: Crop is performed in place overwriting pixels of self.array(). Clone() before crop() if array() must be preserved.
"""
assert isinstance(bbi, vipy.geometry.BoundingBox), "Invalid input"
bbc = bbi.clone().imclipshape(self.width(), self.height()).int() # clipped box to image rectangle
bb = bbi.int() if zeropad else bbc # use clipped box if not zeropad
if bb.isdegenerate():
return None
elif not self.isloaded():
if zeropad and bb != bbc:
# Crop outside the image rectangle will segfault ffmpeg, pad video first (if zeropad=False, then rangecheck will not occur!)
self.zeropad(int(np.ceil(bb.width()-bbc.width())), int(np.ceil(bb.height()-bbc.height()))) # cannot be called in derived classes
bb = bb.offset(int(np.ceil(bb.width()-bbc.width())), int(np.ceil(bb.height()-bbc.height()))) # Shift boundingbox by padding (integer coordinates)
self._ffmpeg = self._ffmpeg.filter('crop', '%d' % bb.width(), '%d' % bb.height(), '%d' % bb.xmin(), '%d' % bb.ymin(), keep_aspect=0) # keep_aspect=False (disable exact=True, this is not present in older ffmpeg)
else:
self.array( bbc.crop(self.array()), copy=False ) # crop first, in-place, valid pixels only
if zeropad and bb != bbc:
((dyb, dya), (dxb, dxa)) = ((max(0, int(abs(np.ceil(bb.ymin() - bbc.ymin())))), max(0, int(abs(np.ceil(bb.ymax() - bbc.ymax()))))),
(max(0, int(abs(np.ceil(bb.xmin() - bbc.xmin())))), max(0, int(abs(np.ceil(bb.xmax() - bbc.xmax()))))))
self._array = np.pad(self.load().array(), ((0,0), (dyb, dya), (dxb, dxa), (0, 0)), mode='constant')
self.shape(shape=(bb.height(), bb.width())) # manually set shape
return self
def pkl(self, pklfile=None):
"""save the object to a pickle file and return the object, useful for intermediate saving in long fluent chains"""
pklfile = pklfile if pklfile is not None else toextension(self.filename(), '.pkl')
remkdir(filepath(pklfile))
vipy.util.save(self, pklfile)
return self
def pklif(self, b, pklfile=None):
"""Save the object to the provided pickle file only if b=True. Uuseful for conditional intermediate saving in long fluent chains"""
assert isinstance(b, bool)
return self.pkl(pklfile) if b else self
def webp(self, outfile=None, pause=3, strict=True, smallest=False, smaller=False, framerate=None):
"""Save a video to an animated WEBP file, with pause=N seconds on the last frame between loops.
Args:
strict: If true, assert that the filename must have an .webp extension
pause: Integer seconds to pause between loops of the animation
smallest: if true, create the smallest possible file but takes much longer to run
smaller: If true, create a smaller file, which takes a little longer to run
framerate [float]: The output framerate of the webp file. The default is the framerate of the video.
Returns:
The filename of the webp file for this video
.. warning:: This may be slow for very long or large videos
"""
outfile = vipy.util.tempWEBP() if outfile is None else outfile
assert strict is False or iswebp(outfile)
outfile = os.path.normpath(os.path.abspath(os.path.expanduser(outfile)))
framerate = framerate if framerate is not None else self._framerate
self.load().frame(0).pil().save(outfile, loop=0, save_all=True, method=6 if smallest else 3 if smaller else 0,
append_images=[self.frame(k).pil() for k in range(1, len(self))],
duration=[int(1000.0/framerate) for k in range(0, len(self)-1)] + [pause*1000])
return outfile
def gif(self, outfile, pause=3, smallest=False, smaller=False, framerate=None):
"""Save a video to an animated GIF file, with pause=N seconds between loops.
Args:
pause: Integer seconds to pause between loops of the animation
smallest: If true, create the smallest possible file but takes much longer to run
smaller: if trye, create a smaller file, which takes a little longer to run
framerate [float]: The output framerate of the webp file. The default is the framerate of the video.
Returns:
The filename of the animated GIF of this video
.. warning:: This will be very large for big videos, consider using `vipy.video.Video.webp` instead.
"""
assert isgif(outfile)
return self.webp(outfile, pause, strict=False, smallest=smallest, smaller=True, framerate=framerate)
def saveas(self, outfile=None, framerate=None, vcodec='libx264', verbose=False, ignoreErrors=False, flush=False, pause=5):
"""Save video to new output video file. This function does not draw boxes, it saves pixels to a new video file.
Args:
outfile: the absolute path to the output video file. This extension can be .mp4 (for video) or [".webp",".gif"] (for animated image)
ignoreErrors: if True, then exit gracefully without throwing an exception. Useful for chaining download().saveas() on parallel dataset downloads
flush: If true, then flush the buffer for this object right after saving the new video. This is useful for transcoding in parallel
framerate: input framerate of the frames in the buffer, or the output framerate of the transcoded video. If not provided, use framerate of source video
pause: an integer in seconds to pause between loops of animated images if the outfile is webp or animated gif
Returns:
a new video object with this video filename, and a clean video filter chain
.. note::
- If self.array() is loaded, then export the contents of self._array to the video file
- If self.array() is not loaded, and there exists a valid video file, apply the filter chain directly to the input video
- If outfile==None or outfile==self.filename(), then overwrite the current filename
"""
outfile = tocache(tempMP4()) if outfile is None else os.path.normpath(os.path.abspath(os.path.expanduser(outfile)))
premkdir(outfile) # create output directory for this file if not exists
framerate = framerate if framerate is not None else self._framerate
assert vipy.util.isvideofile(outfile), "Invalid filename extension for video filename"
if verbose:
print('[vipy.video.saveas]: Saving video "%s" ...' % outfile)
try:
if iswebp(outfile):
return self.webp(outfile, pause)
elif isgif(outfile):
return self.gif(outfile, pause)
elif isjsonfile(outfile):
with open(outfile) as f:
f.write(self.json(encode=True))
return outfile
elif self.isloaded():
# Save numpy() from load() to video, forcing to be even shape
(n, height, width, channels) = self._array.shape
process = ffmpeg.input('pipe:', format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(width, height), r=framerate) \
.filter('pad', 'ceil(iw/2)*2', 'ceil(ih/2)*2') \
.output(filename=outfile, pix_fmt='yuv420p', vcodec=vcodec) \
.overwrite_output() \
.global_args('-cpuflags', '0', '-loglevel', 'quiet' if not vipy.globals.isdebug() else 'debug') \
.run_async(pipe_stdin=True)
for frame in self._array:
process.stdin.write(frame.astype(np.uint8).tobytes())
process.stdin.close()
process.wait()
elif (self.isdownloaded() and self._isdirty()) or isRTSPurl(self.filename()) or isRTMPurl(self.filename()):
# Transcode the video file directly, do not load() then export
# Requires saving to a tmpfile if the output filename is the same as the input filename
tmpfile = '%s.tmp%s' % (filefull(outfile), fileext(outfile)) if outfile == self.filename() else outfile
self._ffmpeg.filter('pad', 'ceil(iw/2)*2', 'ceil(ih/2)*2') \
.output(filename=tmpfile, pix_fmt='yuv420p', vcodec=vcodec, r=framerate) \
.overwrite_output() \
.global_args('-cpuflags', '0', '-loglevel', 'quiet' if not vipy.globals.isdebug() else 'debug') \
.run()
if outfile == self.filename():
if os.path.exists(self.filename()):
os.remove(self.filename())
shutil.move(tmpfile, self.filename())
elif self.hasfilename() and not self._isdirty():
shutil.copyfile(self.filename(), outfile)
elif self.hasurl() and not self.hasfilename():
raise ValueError('Input video url "%s" not downloaded, call download() first' % self.url())
elif not self.hasfilename():
raise ValueError('Input video file not found "%s"' % self.filename())
else:
raise ValueError('saveas() failed')
except Exception as e:
if ignoreErrors:
# useful for saving a large number of videos in parallel where some failed download
print('[vipy.video.saveas]: Failed with error "%s" - Returning empty video' % str(repr(e)))
else:
raise
# Return a new video, cloned from this video with the new video file, optionally flush the video we loaded before returning
return self.clone(flushforward=True, flushfilter=True, flushbackward=flush).filename(outfile).nourl()
def savetmp(self):
"""Call `vipy.video.Video.saveas` using a new temporary video file, and return the video object with this new filename"""
return self.saveas(outfile=tempMP4())
def savetemp(self):
"""Alias for `vipy.video.Video.savetmp`"""
return self.savetmp()
def tocache(self):
"""Call `vipy.video.Video.saveas` using a new temporary video file in VIPY cache, and return the video object with this new filename"""
return self.saveas(outfile=vipy.util.tocache(tempMP4()))
def ffplay(self):
"""Play the video file using ffplay"""
assert self.hasfilename() or (self.hasurl() and self.download().hasfilename()) # triggers download if needed
cmd = 'ffplay "%s"' % self.filename()
print('[vipy.video.play]: Executing "%s"' % cmd)
os.system(cmd)
return self
def play(self, verbose=False, notebook=False, ffplay=True):
"""Play the saved video filename in self.filename()
If there is no filename, try to download it. If the filter chain is dirty or the pixels are loaded, dump to temp video file first then play it. This uses 'ffplay' on the PATH if available, otherwise uses a fallback player by showing a sequence of matplotlib frames.
If the output of the ffmpeg filter chain has modified this video, then this will be saved to a temporary video file. To play the original video (indepenedent of the filter chain of this video), use `vipy.video.Video.ffplay`.
Args:
verbose: If true, show more verbose output
notebook: If true, play in a jupyter notebook
ffplay: If true, use ffplay to display the video (if available)
Returns:
The unmodified video object
"""
if not self.isdownloaded() and self.hasurl():
self.download()
if not self.isloaded() and (iswebp(self.filename()) or isgif(self.filename())):
self.load()
if notebook:
# save to temporary video, this video is not cleaned up and may accumulate
try_import("IPython.display", "ipython"); import IPython.display
if not self.hasfilename() or self.isloaded() or self._isdirty():
v = self.saveas(tempMP4())
warnings.warn('Saving video to temporary file "%s" for notebook viewer ... ' % v.filename())
return IPython.display.Video(v.filename(), embed=True)
return IPython.display.Video(self.filename(), embed=True)
elif ffplay and has_ffplay:
if self.isloaded() or self._isdirty():
f = tempMP4()
if verbose:
warnings.warn('%s - Saving video to temporary file "%s" for ffplay ... ' % ('Video loaded into memory' if self.isloaded() else 'Dirty FFMPEG filter chain', f))
v = self.saveas(f)
cmd = 'ffplay "%s"' % v.filename()
if verbose:
print('[vipy.video.play]: Executing "%s"' % cmd)
os.system(cmd)
if verbose:
print('[vipy.video.play]: Removing temporary file "%s"' % v.filename())
os.remove(v.filename()) # cleanup
elif self.hasfilename() or (self.hasurl() and self.download().hasfilename()): # triggers download
self.ffplay()
else:
raise ValueError('Invalid video file "%s" - ffplay requires a video filename' % self.filename())
return self
else:
"""Fallback player. This can visualize videos without ffplay, but it cannot guarantee frame rates. Large videos with complex scenes will slow this down and will render at lower frame rates."""
fps = self.framerate()
assert fps > 0, "Invalid display framerate"
with Stopwatch() as sw:
for (k,im) in enumerate(self.load() if self.isloaded() else self.stream()):
time.sleep(max(0, (1.0/self.framerate())*int(np.ceil((self.framerate()/fps))) - sw.since()))
im.show(figure='video')
if vipy.globals._user_hit_escape():
break
vipy.show.close('video')
return self
def show(self, verbose=False, notebook=False, ffplay=False):
"""Alias for play"""
return self.play(verbose=verbose, notebook=notebook, ffplay=ffplay)
def quicklook(self, n=9, mindim=256, startframe=0, animate=False, dt=30, thumbnail=None):
"""Generate a montage of n uniformly spaced frames.
Montage increases rowwise for n uniformly spaced frames, starting from frame zero and ending on the last frame.
Input:
n: Number of images in the quicklook
mindim: The minimum dimension of each of the elements in the montage
animate: If true, return a video constructed by animating the quicklook into a video by showing dt consecutive frames
dt: The number of frames for animation
startframe: The initial frame index to start the n uniformly sampled frames for the quicklook
thumbnail [`vipy.image.Image`]: If provided, prepent the first element in the montage with this thumbnail. This is useful for showing a high resolution image (e.g. a face, small object) to be contained in the video for review.
..note:: The first frame in the upper left is guaranteed to be the start frame of the labeled activity, but the last frame in the bottom right may not be precisely the end frame and may be off by at most len(video)/9.
"""
if not self.isloaded():
self.load()
if animate:
return Video(frames=[self.quicklook(n=n, startframe=k, animate=False, dt=dt) for k in range(0, min(dt, len(self)))], framerate=self.framerate())
framelist = [min(int(np.round(f))+startframe, len(self)-1) for f in np.linspace(0, len(self)-1, n)]
imframes = [self.frame(k).maxmatte() # letterbox or pillarbox
for (j,k) in enumerate(framelist)]
imframes = [im.savefig(figure=1).rgb() for im in imframes] # temp storage in memory
if thumbnail is not None:
assert isinstance(thumbnail, vipy.image.Image)
imframes = [thumbnail.maxmatte().mindim(mindim)] + imframes # prepend
return vipy.visualize.montage(imframes, imgwidth=mindim, imgheight=mindim)
def torch(self, startframe=0, endframe=None, length=None, stride=1, take=None, boundary='repeat', order='nchw', verbose=False, withslice=False, scale=1.0, withlabel=False, nonelabel=False):
"""Convert the loaded video of shape NxHxWxC frames to an MxCxHxW torch tensor/
Args:
startframe: [int >= 0] The start frame of the loaded video to use for constructig the torch tensor
endframe: [int >= 0] The end frame of the loaded video to use for constructing the torch tensor
length: [int >= 0] The length of the torch tensor if endframe is not provided.
stride: [int >= 1] The temporal stride in frames. This is the number of frames to skip.
take: [int >= 0] The number of uniformly spaced frames to include in the tensor.
boundary: ['repeat', 'cyclic'] The boundary handling for when the requested tensor slice goes beyond the end of the video
order: ['nchw', 'nhwc', 'chwn', 'cnhw'] The axis ordering of the returned torch tensor N=number of frames (batchsize), C=channels, H=height, W=width
verbose [bool]: Print out the slice used for contructing tensor
withslice: [bool] Return a tuple (tensor, slice) that includes the slice used to construct the tensor. Useful for data provenance.
scale: [float] An optional scale factor to apply to the tensor. Useful for converting [0,255] -> [0,1]
withlabel: [bool] Return a tuple (tensor, labels) that includes the N framewise activity labels.
nonelabel: [bool] returns tuple (t, None) if withlabel=False
Returns
Returns torch float tensor, analogous to torchvision.transforms.ToTensor()
Return (tensor, slice) if withslice=True (withslice takes precedence)
Returns (tensor, labellist) if withlabel=True
.. notes::
- This triggers a load() of the video
- The precedence of arguments is (startframe, endframe) or (startframe, startframe+length), then stride and take.
- Follows numpy slicing rules. Optionally return the slice used if withslice=True
"""
try_import('torch'); import torch
frames = self.load().numpy() if self.load().numpy().ndim == 4 else np.expand_dims(self.load().numpy(), 3) # NxHxWx(C=1, C=3)
assert boundary in ['repeat', 'strict', 'cyclic'], "Invalid boundary mode - must be in ['repeat', 'strict', 'cyclic']"
# Slice index (i=start (zero-indexed), j=end (non-inclusive), k=step)
(i,j,k) = (startframe, endframe, stride)
if startframe == 'random':
assert length is not None, "Random start frame requires fixed length"
i = max(0, np.random.randint(len(frames)-length+1))
if endframe is not None:
assert length is None, "Cannot specify both endframe and length"
assert endframe > startframe, "End frame must be greater than start frame"
(j,k) = (endframe, 1)
if length is not None:
assert endframe is None, "Cannot specify both endframe and length"
assert length >= 0, "Length must be positive"
(j,k) = (i+length, 1)
if length is None and endframe is None:
j = len(frames) # use them all
if stride != 1:
assert take is None, "Cannot specify both take and stride"
assert stride >= 1, "Stride must be >= 1"
k = stride
if take is not None:
# Uniformly sampled frames to result in len(frames)=take
assert stride == 1, "Cannot specify both take and stride"
assert take <= len(frames), "Take must be less than the number of frames"
k = int(np.ceil(len(frames)/float(take)))
# Boundary handling
assert i >= 0, "Start frame must be >= 0"
assert i < j, "Start frame must be less then end frame"
assert k <= len(frames), "Stride must be <= len(frames)"
n = len(frames) # true video length for labels
if boundary == 'repeat' and j > len(frames):
for d in range(j-len(frames)):
frames = np.concatenate( (frames, np.expand_dims(frames[-1], 0) ))
elif boundary == 'cyclic' and j > len(frames):
for d in range(j-len(frames)):
frames = np.concatenate( (frames, np.expand_dims(frames[j % len(frames)], 0) ))
assert j <= len(frames), "invalid slice=%s for frame shape=%s" % (str((i,j,k)), str(frames.shape))
if verbose:
print('[vipy.video.torch]: slice (start,end,step)=%s for frame shape (N,C,H,W)=%s' % (str((i,j,k)), str(frames.shape)))
# Slice and transpose to torch tensor axis ordering
t = torch.from_numpy(frames[i:j:k] if (k!=1 or i!=0 or j!=len(frames)) else frames) # do not copy - This shares the numpy buffer of the video, be careful!
if t.dim() == 2:
t = t.unsqueeze(0).unsqueeze(-1) # HxW -> (N=1)xHxWx(C=1)
if order == 'nchw':
t = t.permute(0,3,1,2) # NxCxHxW, view
elif order == 'nhwc':
pass # NxHxWxC (native numpy order)
elif order == 'cnhw' or order == 'cdhw':
t = t.permute(3,0,1,2) # CxNxHxW == CxDxHxW (for torch conv3d), view
elif order == 'chwn':
t = t.permute(3,1,2,0) # CxHxWxN, view
else:
raise ValueError("Invalid order = must be in ['nchw', 'nhwc', 'chwn', 'cnhw']")
# Scaling (optional)
if scale is not None and self.colorspace() != 'float':
t = (1.0/255.0)*t # [0,255] -> [0,1]
elif scale is not None and scale != 1.0:
t = scale*t
# Return tensor or (tensor, slice) or (tensor, labels)
if withslice:
return (t, (i,j,k))
elif withlabel:
labels = [sorted(tuple(self.activitylabels( (f%n) if boundary == 'cyclic' else min(f, n-1) ))) for f in range(i,j,k)]
return (t, labels)
elif nonelabel:
return (t, None)
else:
return t
def clone(self, flushforward=False, flushbackward=False, flush=False, flushfilter=False, rekey=False, flushfile=False, shallow=False, sharedarray=False, sanitize=True):
"""Create deep copy of video object, flushing the original buffer if requested and returning the cloned object.
Flushing is useful for distributed memory management to free the buffer from this object, and pass along a cloned
object which can be used for encoding and will be garbage collected.
Args:
flushforward: copy the object, and set the cloned object `vipy.video.Video.array` to None. This flushes the video buffer for the clone, not the object
flushbackward: copy the object, and set the object array() to None. This flushes the video buffer for the object, not the clone.
flush: set the object array() to None and clone the object. This flushes the video buffer for both the clone and the object.
flushfilter: Set the ffmpeg filter chain to the default in the new object, useful for saving new videos
flushfile: Remove the filename and the URL from the video object. Useful for creating new video objects from loaded pixels.
rekey: Generate new unique track ID and activity ID keys for this scene
shallow: shallow copy everything (copy by reference), except for ffmpeg object. attributes dictionary is shallow copied
sharedarray: deep copy of everything, except for pixel buffer which is shared. Changing the pixel buffer on self is reflected in the clone.
sanitize: remove private attributes from self.attributes dictionary. A private attribute is any key with two leading underscores '__' which should not be propagated to clone
Returns:
A deepcopy of the video object such that changes to self are not reflected in the copy
.. note:: Cloning videos is an expensive operation and can slow down real time code. Use sparingly.
"""
if sanitize:
a = self.attributes # copy reference to attributes to restore
self.attributes = {} # remove attributes on self for fast clone() since private attributes will be filtered anyway
if flush or (flushforward and flushbackward):
self._array = None # flushes buffer on object and clone
#self._previewhash = None
self._shape = None
v = copy.deepcopy(self) # object and clone are flushed
elif flushbackward:
v = copy.deepcopy(self) # propagates _array to clone
self._array = None # object flushed, clone not flushed
#self._previewhash = None
self._shape = None
elif flushforward:
array = self._array;
self._array = None
#self._previewhash = None
self._shape = None
v = copy.deepcopy(self) # does not propagate _array to clone
self._array = array # object not flushed
v._array = None # clone flushed
elif shallow:
v = copy.copy(self) # shallow copy
v._ffmpeg = copy.deepcopy(self._ffmpeg) # except for ffmpeg object
v.attributes = {k:v for (k,v) in self.attributes.items()} # shallow copy of keys
v._array = np.asarray(self._array) if self._array is not None else None # shared pixels
elif sharedarray:
array = self._array
self._array = None
v = copy.deepcopy(self) # deep copy of everything but pixels
v._array = np.asarray(array) if array is not None else None # shared pixels
self._array = array # restore
else:
v = copy.deepcopy(self)
if flushfilter:
v._ffmpeg = ffmpeg.input(v.filename()) # no other filters
#v._previewhash = None
v._shape = None
(v._startframe, v._endframe) = (None, None)
(v._startsec, v._endsec) = (None, None)
if rekey:
v.rekey()
if flushfile:
v.nofilename().nourl()
if sanitize:
self.attributes = a # restore attributes
v.attributes = {k:v for (k,v) in self.attributes.items()} # shallow copy
v.sanitize() # remove private attributes
return v
def flush(self):
"""Alias for clone(flush=True), returns self not clone"""
self._array = None # flushes buffer on object and clone
self.delattribute("__stream_buffer") # remove if present
#self._previewhash = None
self._shape = None
return self
def returns(self, r=None):
"""Return the provided value, useful for terminating long fluent chains without returning self"""
return r
def flush_and_return(self, retval):
"""Flush the video and return the parameter supplied, useful for long fluent chains"""
self.flush()
return retval
def map(self, func):
"""Apply lambda function to the loaded numpy array img, changes pixels not shape
Lambda function must have the following signature:
* newimg = func(img)
* img: HxWxC numpy array for a single frame of video
* newimg: HxWxC modified numpy array for this frame. Change only the pixels, not the shape
The lambda function will be applied to every frame in the video in frame index order.
"""
assert isinstance(func, types.LambdaType), "Input must be lambda function with np.array() input and np.array() output"
oldimgs = self.load().array()
self.array(np.apply_along_axis(func, 0, self._array)) # FIXME: in-place operation?
if (any([oldimg.dtype != newimg.dtype for (oldimg, newimg) in zip(oldimgs, self.array())]) or
any([oldimg.shape != newimg.shape for (oldimg, newimg) in zip(oldimgs, self.array())])):
self.colorspace('float') # unknown colorspace after shape or type transformation, set generic
return self
def gain(self, g):
"""Pixelwise multiplicative gain, such that each pixel p_{ij} = g * p_{ij}"""
return self.normalize(0, 1, scale=g)
def bias(self, b):
"""Pixelwise additive bias, such that each pixel p_{ij} = b + p_{ij}"""
return self.normalize(mean=0, std=1, scale=1.0, bias=b)
def float(self):
self.load()
self._array = self._array.astype(np.float32) if self._array is not None else self._array
return self
def channel(self, c):
self.load()
assert c >= 0 and c < self.channels()
self._array = self._array[:,:,:,c] if self._array is not None else self._array
return self
def normalize(self, mean, std, scale=1, bias=0):
"""Pixelwise whitening, out = ((scale*in) - mean) / std); triggers load(). All computations float32"""
assert scale >= 0, "Invalid input"
assert all([s > 0 for s in tolist(std)]), "Invalid input"
self._array = vipy.math.normalize(self._array, np.array(mean, dtype=np.float32), np.array(std, dtype=np.float32), np.float32(scale))
if bias != 0:
self._array = self._array + np.array(bias, dtype=np.float32)
return self.colorspace('float')
def setattribute(self, k, v=None):
if self.attributes is None:
self.attributes = {}
self.attributes[k] = v
return self
def _has_private_attribute(self):
"""Does the attributes dictionary contain any private attributes (e.g. those keys prepended with '__')"""
return isinstance(self.attributes, dict) and any([k.startswith('__') for k in self.attributes.keys()])
def hasattribute(self, k):
"""Does the attributes dictionary (self.attributes) contain the provided key"""
return isinstance(self.attributes, dict) and k in self.attributes
def delattribute(self, k):
if k in self.attributes:
self.attributes.pop(k)
return self
def clearattributes(self):
"""Remove all attributes"""
self.attributes = {}
return self
def getattribute(self, k):
"""Return the key k in the attributes dictionary (self.attributes) if present, else None"""
return self.attributes[k] if k in self.attributes else None
class VideoCategory(Video):
"""vipy.video.VideoCategory class
A VideoCategory is a video with associated category, such as an activity class. This class includes all of the constructors of vipy.video.Video
along with the ability to extract a clip based on frames or seconds.
"""
def __init__(self, filename=None, url=None, framerate=30.0, attributes=None, category=None, array=None, colorspace=None, startframe=None, endframe=None, startsec=None, endsec=None):
super().__init__(url=url, filename=filename, framerate=framerate, attributes=attributes, array=array, colorspace=colorspace,
startframe=startframe, endframe=endframe, startsec=startsec, endsec=endsec)
self._category = category
@classmethod
def from_json(cls, s):
d = json.loads(s) if not isinstance(s, dict) else s
d = {k.lstrip('_'):v for (k,v) in d.items()} # prettyjson (remove "_" prefix to attributes)
v = super().from_json(s)
v._category = d['category']
return v
def __repr__(self):
strlist = []
if self.isloaded():
strlist.append("height=%d, width=%d, frames=%d" % (self._array[0].shape[0], self._array[0].shape[1], len(self._array)))
if self.filename() is not None:
strlist.append('filename="%s"' % self.filename())
if self.hasurl():
strlist.append('url="%s"' % self.url())
if self.category() is not None:
strlist.append('category="%s"' % self.category())
if not self.isloaded() and self._startframe is not None and self._endframe is not None:
strlist.append('clip=(%d,%d)' % (self._startframe, self._endframe))
if not self.isloaded() and self._startframe is not None and self._endframe is None:
strlist.append('clip=(%d,)' % (self._startframe))
return str('<vipy.video.VideoCategory: %s>' % (', '.join(strlist)))
def json(self, encode=True):
d = json.loads(super().json())
d['category'] = self._category
return json.dumps(d) if encode else d
def category(self, c=None):
if c is None:
return self._category
else:
self._category = c
return self
class Scene(VideoCategory):
""" vipy.video.Scene class
The vipy.video.Scene class provides a fluent, lazy interface for representing, transforming and visualizing annotated videos.
The following constructors are supported:
```python
vid = vipy.video.Scene(filename='/path/to/video.ext')
```
Valid video extensions are those that are supported by ffmpeg ['.avi','.mp4','.mov','.wmv','.mpg', 'mkv', 'webm'].
```python
vid = vipy.video.Scene(url='https://www.youtube.com/watch?v=MrIN959JuV8')
vid = vipy.video.Scene(url='http://path/to/video.ext', filename='/path/to/video.ext')
```
Youtube URLs are downloaded to a temporary filename, retrievable as vid.download().filename(). If the environment
variable 'VIPY_CACHE' is defined, then videos are saved to this directory rather than the system temporary directory.
If a filename is provided to the constructor, then that filename will be used instead of a temp or cached filename.
URLs can be defined as an absolute URL to a video file, or to a site supported by 'youtube-dl'
[https://ytdl-org.github.io/youtube-dl/supportedsites.html]
```python
vid = vipy.video.Scene(array=frames, colorspace='rgb')
```
The input 'frames' is an NxHxWx3 numpy array corresponding to an N-length list of HxWx3 uint8 numpy array which is a single frame of pre-loaded video
Note that the video transformations (clip, resize, rescale, rotate) are only available prior to load(), and the array() is assumed immutable after load().
```python
vid = vipy.video.Scene(array=greyframes, colorspace='lum')
```
The input 'greyframes' is an NxHxWx1 numpy array corresponding to an N-length list of HxWx3 uint8 numpy array which is a single frame of pre-loaded video
This corresponds to the luminance of an RGB colorspace
```python
vid = vipy.video.Scene(array=greyframes, colorspace='lum', tracks=tracks, activities=activities)
```
- tracks = [vipy.object.Track(), ...]
- activities = [vipy.object.Activity(), ...]
The inputs are lists of tracks and/or activities. An object is a spatial bounding box with a category label. A track is a spatiotemporal bounding
box with a category label, such that the box contains the same instance of an object. An activity is one or more tracks with a start and end frame for an
activity performed by the object instances. Track and activity timing must be relative to the start frame of the Scene() constructor.
"""
def __init__(self, filename=None, url=None, framerate=30.0, array=None, colorspace=None, category=None, tracks=None, activities=None,
attributes=None, startframe=None, endframe=None, startsec=None, endsec=None):
self._tracks = {}
self._activities = {}
super().__init__(url=url, filename=filename, framerate=framerate, attributes=attributes, array=array, colorspace=colorspace,
category=category, startframe=startframe, endframe=endframe, startsec=startsec, endsec=endsec)
# Tracks must be defined relative to the clip specified by this constructor
if tracks is not None:
tracks = tracks if isinstance(tracks, list) or isinstance(tracks, tuple) else [tracks] # canonicalize
assert all([isinstance(t, vipy.object.Track) for t in tracks]), "Invalid track input; tracks=[vipy.object.Track(), ...]"
self._tracks = {t.id():t for t in tracks}
# Activities must be defined relative to the clip specified by this constructor
if activities is not None:
activities = activities if isinstance(activities, list) or isinstance(activities, tuple) else [activities] # canonicalize
assert all([isinstance(a, vipy.activity.Activity) for a in activities]), "Invalid activity input; activities=[vipy.activity.Activity(), ...]"
self._activities = {a.id():a for a in activities}
self._currentframe = None # deprecated
@classmethod
def cast(cls, v, flush=False):
"""Cast a conformal vipy object to this class. This is useful for downcast and upcast conversion of video objects."""
assert isinstance(v, vipy.video.Video), "Invalid input - must be derived from vipy.video.Video"
if v.__class__ != vipy.video.Scene:
v.__class__ = vipy.video.Scene
v._tracks = {} if flush or not hasattr(v, '_tracks') else v._tracks
v._activities = {} if flush or not hasattr(v, '_activities') else v._activities
v._category = None if flush or not hasattr(v, '_category') else v._category
return v
@classmethod
def asjson(cls, s):
"""Restore an object serialized with self.json(). Alas for `vipy.video.Scene.from_json`.
Usage:
```python
vs = vipy.video.Scene.asjson(v.json())
```
"""
return vipy.video.Scene.from_json(s)
@classmethod
def from_json(cls, s):
"""Restore an object serialized with self.json()
Usage:
```python
vs = vipy.video.Scene.from_json(v.json())
```
"""
d = json.loads(s) if not isinstance(s, dict) else s
d = {k.lstrip('_'):v for (k,v) in d.items()} # prettyjson (remove "_" prefix to attributes)
v = super().from_json(s)
# Packed attribute storage:
# - When loading a large number of vipy objects, the python garbage collector slows down signficantly due to reference cycle counting
# - Mutable objects and custom containers are tracked by the garbage collector and the more of them that are loaded the longer GC takes
# - To avoid this, load attributes as tuples of packed strings. This is an immutable type that is not reference counted. Check this with gc.is_tracked()
# - Then, unpack load the attributes on demand when accessing tracks() or activities(). Then, the nested containers are reference counted (even though they really should not since there are no cycles by construction)
# - This is useful when calling vipy.util.load(...) on archives that contain hundreds of thousands of objects
# - Do not access the private attributes self._tracks and self._attributes as they will be packed until needed
# - Should install ultrajson (pip install ujson) for super fast parsing
v._tracks = tuple([x if isinstance(x, str) else str(json.dumps(x)) for x in d['tracks'].values()]) # track ID key is embedded in object, legacy unpack of doubly JSON encoded strings (vipy-1.11.16)
v._activities = tuple([x if isinstance(x, str) else str(json.dumps(x)) for x in d['activities'].values()]) # track ID key is embedded in object, legacy unpack of doubly JSON encoded strings (vipy-1.11.16)
return v
def pack(self):
"""Packing a scene returns the scene with the annotations JSON serialized.
- This is useful for fast garbage collection when there are many objects in memory
- This is useful for distributed processing prior to serializing from a scheduler to a client
- This is useful for lazy deserialization of complex attributes when loading many videos into memory
- Unpacking is transparent to the end user and is performed on the fly when annotations are accessed. There is no unpack() method.
- See the notes in from_json() for why this helps with nested containers and reference cycle tracking with the python garbage collector
"""
d = json.loads(self.json())
self._tracks = tuple([x if isinstance(x, str) else str(json.dumps(x)) for x in d['tracks'].values()]) # efficient garbage collection: store as a packed string to avoid reference cycle tracking, unpack on demand
self._activities = tuple([x if isinstance(x, str) else str(json.dumps(x)) for x in d['activities'].values()]) # efficient garbage collection: store as a packed string to avoid reference cycle tracking, unpack on demand
return self
def __repr__(self):
strlist = []
if self.isloaded():
strlist.append("height=%d, width=%d, frames=%d, color=%s" % (self.height(), self.width(), len(self._array), self.colorspace()))
if self.filename() is not None:
strlist.append('filename="%s"' % (self.filename()))
if self.hasurl():
strlist.append('url="%s"' % self.url())
if self._framerate is not None:
strlist.append('fps=%1.1f' % float(self._framerate))
if not self.isloaded() and self._startframe is not None and self._endframe is not None:
strlist.append('clip=(%d,%d)' % (self._startframe, self._endframe))
if not self.isloaded() and self._startframe is not None and self._endframe is None:
strlist.append('clip=(%d,)' % (self._startframe))
if self.category() is not None:
strlist.append('category="%s"' % self.category())
if self.hastracks():
strlist.append('tracks=%d' % len(self._tracks))
if self.hasactivities():
strlist.append('activities=%d' % len(self._activities))
return str('<vipy.video.scene: %s>' % (', '.join(strlist)))
def instanceid(self, newid=None):
"""Return an annotation instance identifier for this video.
An instance ID is a unique identifier for a ground truth annotation within a video, either a track or an activity. More than one instance ID may share the same video ID if they are from the same source videofile.
This is useful when calling `vipy.video.Scene.activityclip` or `vipy.video.Scene.activitysplit` to clip a video into segments such that each clip has a unique identifier, but all share the same underlying `vipy.video.Video.videoid`.
This is useful when calling `vipy.video.Scene.trackclip` or `vipy.video.Scene.tracksplit` to clip a video into segments such that each clip has a unique identifier, but all share the same underlying `vipy.video.Video.videoid`.
Returns:
INSTANCEID: if 'instance_id' key is in self.attribute
VIDEOID_INSTANCEID: if '_instance_id' key is in self.attribute, as set by activityclip() or trackclip(). This is set using INSTANCE_ID=ACTIVITYID_ACTIVITYINDEX or INSTANCEID=TRACKID_TRACKINDEX, where the index is the temporal order of the annotation in the source video prior to clip().
VIDEOID_ACTIVITYINDEX: if 'activityindex' key is in self.attribute, as set by activityclip(). (fallback for legacy datasets).
VIDEOID: otherwise
"""
if newid is not None:
self.setattribute('instance_id', newid)
return self
else:
if 'instance_id' in self.attributes:
return self.attributes['instance_id'] # set at video creation time (e.g. pycollector)
elif '_instance_id' in self.attributes:
return self.attributes['_instance_id'] # set at activityclip() time for provenance from clips back to videos
elif 'activityindex' in self.attributes:
return '%s_%s' % (self.videoid(), str(self.attributes['activityindex'])) # set at activityclip() time for provenance from clips back to videos (deprecated)
else:
return self.videoid()
def frame(self, k=0, img=None, noimage=False, t=None):
"""Return `vipy.image.Scene` object at frame k
-The attributes of each of the `vipy.image.Scene.objects` in the scene contains helpful metadata for the provenance of the detection, including:
- 'trackid' of the track this detection
- 'activityid' associated with this detection
- 'jointlabel' of this detection, used for visualization
- 'noun verb' of this detection, used for visualization
Args:
k: [int >= 0] The frame index requested. This is relative to the current frame rate of the video.
img: [numpy, None] An optional image to be used for this frame. This is useful to construct frames efficiently for videos if the pixel buffer is already available from a stream rather than a preview.
noimage [bool]: If True, then return only annotations at frame k with empty frame buffer (e.g. no image pixels in the returned image object)
t: [float >= 0] The frame time requested. This is converted into a frame index using the current framerate of the video.
Return:
A `vipy.image.Scene` object for frame k containing all objects in this frame and pixels if img != None or preview=True
.. note::
- Modifying this frame will not affect the source video
- If multiple objects are associated with an activity and a primary actor is defined, then only the primary actor is displayed as "Noun Verbing", objects are shown as "Noun" with the activityid in the attribute
- If noun is associated with more than one activity, then this is shown as "Noun Verbing1\nNoun Verbing2", with a newline separator
"""
k = int(self.framerate()*t) if t is not None else k
assert isinstance(k, int) and k>=0, "Frame index must be non-negative integer"
assert img is not None or (self.isloaded() and k<len(self)) or not self.isloaded(), "Invalid frame index %d - Indexing video by frame must be integer within (0, %d)" % (k, len(self)-1)
img = img if (img is not None or noimage) else (self._array[k] if self.isloaded() else self.preview(k).array())
dets = [t[k].clone(deep=True).setattribute('trackindex', j) for (j, t) in enumerate(self.tracklist()) if len(t)>0 and (t.during(k) or t.boundary()=='extend')] # track interpolation (cloned) with boundary handling
for d in dets:
d.attributes['activityid'] = [] # reset
jointlabel = [(d.shortlabel(),'')] # [(Noun, Verbing1), (Noun, Verbing2), ...], initialized with empty verbs as [(Noun, ""), ... ]
activityconf = [None] # for display
for (aid, a) in self.activities().items(): # insertion order: First activity is primary, next is secondary (not in confidence order)
if a.hastrack(d.attributes['trackid']) and a.during(k):
# Display assumptions:
# - Jointlabel is always displayed as "Noun Verbing" during activity (e.g. Person Carrying, Vehicle Turning) using noun=track shortlabel, verb=activity shortlabel
# - If noun is associated with more than one activity, then this is shown as "Noun Verbing1\nNoun Verbing2", with a newline separator
# - If multiple objects are associated with an activity and a primary actor is defined, then only the primary actor is displayed as "Noun Verbing", objects are shown as "Noun" with the activityid in the attributes
if (a.actorid() is None or (a.actorid() == d.attributes['trackid'])) and not any([a.shortlabel() == v for (n,v) in jointlabel]):
jointlabel.append( (d.shortlabel(), a.shortlabel()) ) # only show each activity once (even if repeated)
activityconf.append(a.confidence())
d.attributes['activityid'].append(a.id()) # for activity correspondence (if desired)
# For display purposes
# - See `vipy.image.mutator_show_trackindex_verbonly`
# - Double prepended underscore attributes are private and cleaned using `vipy.image.Image.sanitize`
d.attributes['__jointlabel'] = '\n'.join([('%s %s' % (n,v)).strip() for (n,v) in jointlabel[0 if len(jointlabel)==1 else 1:]]) # to be deprecated
d.attributes['__noun verb'] = jointlabel[0 if len(jointlabel)==1 else 1:]
d.attributes['__activityconf'] = activityconf[0 if len(jointlabel)==1 else 1:]
d.attributes['__trackindex'] = d.attributes['trackindex'] # trackindex to be replaced with __trackindex
d.attributes['__trackid'] = d.attributes['trackid'] # trackid to be replaced with __trackid
d.attributes['__activityid'] = d.attributes['activityid'] # activityid to be replaced with __activityid
dets.sort(key=lambda d: (d.confidence() if d.confidence() is not None else 0, d.shortlabel())) # layering in video is ordered by decreasing track confidence and alphabetical shortlabel
return vipy.image.Scene(array=img, colorspace=self.colorspace(), objects=dets, category=self.category())
def during(self, frameindex):
try:
self.__getitem__(frameindex) # triggers load
return True
except:
return False
def labeled_frames(self):
"""Iterate over frames, yielding tuples (activity+object labelset in scene, vipy.image.Scene())"""
self.load()
for k in range(0, len(self)):
#self._currentframe = k # used only for incremental add()
yield (self.labels(k), self.__getitem__(k))
#self._currentframe = None
def framecomposite(self, n=2, dt=10, mindim=256):
"""Generate a single composite image with minimum dimension mindim as the uniformly blended composite of n frames each separated by dt frames"""
if not self.isloaded():
self.mindim(mindim).load()
imframes = [self.frame(k).maxmatte() for k in range(0, dt*n, dt)]
img = np.uint8(np.sum([1/float(n)*im.array() for im in imframes], axis=0))
return imframes[0].clone().array(img)
def isdegenerate(self):
"""Degenerate scene has empty or malformed tracks"""
return len(self.tracklist()) == 0 or any([t.isempty() or t.isdegenerate() for t in self.tracklist()])
def quicklook(self, n=9, dilate=1.5, mindim=256, fontsize=10, context=False, startframe=0, animate=False, dt=30, thumbnail=None):
"""Generate a montage of n uniformly spaced annotated frames centered on the union of the labeled boxes in the current frame to show the activity ocurring in this scene at a glance
Montage increases rowwise for n uniformly spaced frames, starting from frame zero and ending on the last frame. This quicklook is most useful when len(self.activities()==1)
for generating a quicklook from an activityclip().
Args:
n: [int]: Number of images in the quicklook
dilate: [float]: The dilation factor for the bounding box prior to crop for display
mindim: [int]: The minimum dimension of each of the elemenets in the montage
fontsize: [int]: The size of the font for the bounding box label
context: [bool]: If true, replace the first and last frame in the montage with the full frame annotation, to help show the scale of the scene
animate: [bool]: If true, return a video constructed by animating the quicklook into a video by showing dt consecutive frames
dt: [int]: The number of frames for animation
startframe: [int]: The initial frame index to start the n uniformly sampled frames for the quicklook
thumbnail [`vipy.image.Image`]: If provided, prepend the first element in the montage with this thumbnail. This is useful for showing a high resolution image (e.g. a face, small object) to be contained in the video for review.
"""
if not self.isloaded():
self.load() # triggers load() into memory, user should self.flush() to free
if animate:
return Video(frames=[self.quicklook(n=n, dilate=dilate, mindim=mindim, fontsize=fontsize, context=context, startframe=k, animate=False, dt=dt, thumbnail=thumbnail) for k in range(0, min(dt, len(self)))], framerate=self.framerate())
f_mutator = vipy.image.mutator_show_jointlabel()
framelist = [min(int(np.round(f))+startframe, len(self)-1) for f in np.linspace(0, len(self)-1, n)]
isdegenerate = [self.frame(k).boundingbox() is None or self.frame(k).boundingbox().dilate(dilate).intersection(self.framebox(), strict=False).isdegenerate() for (j,k) in enumerate(framelist)]
imframes = [self.frame(k).maxmatte() # letterbox or pillarbox
if (isdegenerate[j] or (context is True and (j == 0 or j == (n-1)))) else
self.frame(k).padcrop(self.frame(k).boundingbox().dilate(dilate).imclipshape(self.width(), self.height()).maxsquare().int()).mindim(mindim, interp='nearest')
for (j,k) in enumerate(framelist)]
imframes = [f_mutator(im) for im in imframes] # show jointlabel from frame interpolation
imframes = [im.savefig(fontsize=fontsize, figure=1).rgb() for im in imframes] # temp storage in memory
if thumbnail is not None:
assert isinstance(thumbnail, vipy.image.Image)
imframes = [thumbnail.maxmatte().mindim(mindim)] + imframes
return vipy.visualize.montage(imframes, imgwidth=mindim, imgheight=mindim)
def tracks(self, tracks=None, id=None):
"""Return mutable dictionary of tracks,
Args:
tracks [dict]: If provided, replace track dictionary with provided track dictionary, and return self
id [str]: If provided, return just the track associated with the provided track id
Returns:
This object if tracks!=None, otherwise the requested track (if id!=None) or trackdict (tracks=None)
"""
if isinstance(self._tracks, tuple):
self._tracks = {t.id():t for t in [vipy.object.Track.from_json(json.loads(s)) for s in self._tracks]} # on-demand unpack (efficient garbage collection for large list of objects)
if tracks is None and id is None:
return self._tracks # mutable dict
elif id is not None:
return self._tracks[id]
elif isinstance(tracks, dict):
assert all([isinstance(t, vipy.object.Track) and k == t.id() for (k,t) in tracks.items()]), "Invalid input - Must be dictionary of vipy.object.Track"
self._tracks = tracks.copy() # shallow copy
return self
else:
assert all([isinstance(t, vipy.object.Track) for t in tolist(tracks)]), "Invalid input - Must be vipy.object.Track or list of vipy.object.Track"
self._tracks = {t.id():t for t in tolist(tracks)} # insertion order preserved (python >=3.6)
return self
def track(self, id):
return self.tracks(id=id)
def trackindex(self, id):
"""Return the index in the tracklist of the track with the provided track id"""
assert id in self.tracks()
return [t.id() for t in self.tracklist()].index(id)
def trackidx(self, idx):
"""Return the track at the specified index in the tracklist"""
return self.tracklist()[idx]
def activity(self, id=None):
return self.activities(id=id) if id is not None else self.primary_activity()
def next_activity(self, id):
"""Return the next activity just after the given activityid"""
assert id in self.activities()
A = self.activitylist()
k = [k for (k,a) in enumerate(A) if a.id() == id][0]
return A[k+1] if k<len(A)-1 else None
def prev_activity(self, id):
"""Return the previous activity just before the given activityid"""
assert id in self.activities()
A = self.activitylist()
k = [k for (k,a) in enumerate(A) if a.id() == id][0]
return A[k-1] if k>=1 else None
def tracklist(self):
return list(self.tracks().values()) # triggers shallow copy
def objects(self, casesensitive=True):
"""The objects in a scene are the unique categories of tracks"""
return sorted(list(set([t.category() if casesensitive else t.category().lower() for t in self.tracklist()])))
def actorid(self, id=None, fluent=False):
"""Return or set the actor ID for the video.
- The actor ID is the track ID of the primary actor in the scene. This is useful for assigning a role for activities that are performed by the actor.
- The actor ID is the first track is in the tracklist
Args:
id: [str] if not None, then use this track ID as the actor
fluent: [bool] If true, always return self. This is useful for those cases where the actorid being set is None.
Returns:
[id=None, fluent=False] the actor ID
[id is not None] The video with the actor ID set, only if the ID is found in the tracklist
.. note:: Not to be confused with biometric subject id. For videos collected with Visym Collector platform (https://visym.com/collector), the biometric subject ID can be retrieved via `vipy.video.Video.metadata` (e.g. self.metadata()['subject_ids']).
"""
if id is None:
return (next(iter(self.tracks().keys())) if len(self._tracks)>0 else None) if not fluent else self # Python >=3.6
elif id in self._tracks:
# Reorder tracks so that id is first
idlist = [id] + [ti for ti in self.tracks().keys() if ti != id]
self._tracks = {k:self.track(k) for k in idlist}
else:
warnings.warn('trackid=%s not found in "%s"' % (str(id), str(self)))
return self
def setactorid(self, id):
"""Alias for `vipy.video.Scene.actorid`"""
return self.actorid(id, fluent=True)
def actor(self):
"""Return the primary actor (first `vipy.object.Track`) in the video"""
return next(iter(self.tracks().values())) if len(self._tracks)>0 else None # Python >=3.6
def primary_activity(self):
"""Return the primary activity of the video.
- The primary activity is the first activity in the activitylist.
- This is useful for activityclip() videos that are centered on a single activity
Returns:
`vipy.activity.Activity` that is first in the `vipy.video.Scene.activitylist`
"""
return next(iter(self.activities().values())) if len(self._activities)>0 else None # Python >=3.6
def first_activity(self):
"""Return the first activity of the video with the earliest start frame"""
return sorted(self.activitylist(), key=lambda a: a.startframe())[0] if len(self._activities)>0 else None
def last_activity(self):
"""Return the last activity of the video with the latest end frame"""
return sorted(self.activitylist(), key=lambda a: a.endframe())[-1] if len(self._activities)>0 else None
def activities(self, activities=None, id=None):
"""Return mutable dictionary of activities. All temporal alignment is relative to the current clip()."""
if isinstance(self._activities, tuple):
self._activities = {a.id():a for a in [vipy.activity.Activity.from_json(json.loads(s)) for s in self._activities]} # on-demand
if activities is None and id is None:
return self._activities # mutable dict
elif id is not None:
return self._activities[id]
elif isinstance(activities, dict):
assert all([isinstance(a, vipy.activity.Activity) and k == a.id() for (k,a) in activities.items()]), "Invalid input - Must be dictionary of vipy.activity.Activity"
self._activities = activities.copy() # shallow copy
return self
else:
assert all([isinstance(a, vipy.activity.Activity) for a in tolist(activities)]), "Invalid input - Must be vipy.activity.Activity or list of vipy.activity.Activity"
self._activities = {a.id():a for a in tolist(activities)} # insertion order preserved (python >=3.6)
return self
def activityindex(self, k):
"""Return the `vipy.activity.Activity` at the requested index order in the video"""
alist = self.activitylist()
assert k >= 0 and k < len(alist), "Invalid index"
return alist[k]
def activitylist(self):
"""Return a list of activities in the video, returned in insertion order.
Returns:
A list of `vipy.activity.Activity` insertion ordered into the original video
.. note:: The order of the activitylist() will not match the order of activityclip(), which is sorted by activity startframe. To match, use sorted(activitylist, key=lambda a: a.startframe())
"""
return list(self.activities().values()) # insertion ordered (python >=3.6), triggers shallow copy
def activityfilter(self, f):
"""Apply boolean lambda function f to each activity and keep activity if function is true, remove activity if function is false
Filter out all activities longer than 128 frames
```python
vid = vid.activityfilter(lambda a: len(a)<128)
```
Filter out activities with category in set
```python
vid = vid.activityfilter(lambda a: a.category() in set(['category1', 'category2']))
```
Args:
f: [lambda] a lambda function that takes an activity and returns a boolean
Returns:
This video with the activities f(a)==False removed.
"""
assert callable(f)
self._activities = {k:a for (k,a) in self.activities().items() if f(a) == True}
return self
def trackfilter(self, f, activitytrack=True):
"""Apply lambda function f to each object and keep if filter is True.
Args:
activitytrack: [bool] If true, remove track assignment from activities also, may result in activities with no tracks
f: [lambda] The lambda function to apply to each track t, and if f(t) returns True, then keep the track
Returns:
self, with tracks removed in-place
.. note:: Applying track filter with activitytrack=True may result in activities with no associated tracks. You should follow up with self.activityfilter(lambda a: len(a.trackids()) > 0).
"""
assert callable(f)
self._tracks = {k:t for (k,t) in self.tracks().items() if f(t) == True}
if activitytrack:
self.activitymap(lambda a: a.trackfilter(lambda ti: ti in self._tracks)) # remove track association in activities
#if any([len(a.tracks()) == 0 for a in self.activitylist()]):
# warnings.warn('trackfilter(..., activitytrack=True) removed tracks which returned at least one degenerate activity with no tracks')
return self
def trackmap(self, f, strict=True):
"""Apply lambda function f to each activity
-strict=True: enforce that lambda function must return non-degenerate Track() objects
"""
assert callable(f)
self._tracks = {k:f(t) for (k,t) in self.tracks().items()}
assert all([isinstance(t, vipy.object.Track) and (strict is False or not t.isdegenerate()) for (tk,t) in self.tracks().items()]), "Lambda function must return non-degenerate vipy.object.Track()"
return self
def activitymap(self, f):
"""Apply lambda function f to each activity"""
assert callable(f)
self._activities = {k:f(a) for (k,a) in self.activities().items()}
assert all([isinstance(a, vipy.activity.Activity) for a in self.activitylist()]), "Lambda function must return vipy.activity.Activity()"
return self
def rekey(self, tracks=None, activities=None):
"""Change the track and activity IDs to randomly assigned UUIDs. Useful for cloning unique scenes.
```python
v = vipy.video.RandomScene()
v.rekey() # randomly rekey all track and activity ID
v.rekey(tracks={...}) # rekey tracks (oldkey -> newkey) according to dictionary, randomly rekey activities
v.rekey(tracks={...}, activities={}) # rekey tracks according to dict, no change to activities
```
Args:
tracks [dict]: If not None, use this dictionary to remap oldkey->newkey for tracks. If None, use random keys. If empty dict, no change (do not rekey tracks)
activities [dict]: If not None, use this dictionary to remap oldkey->newkey for activities. If None, use random keys. If empty dict, no change (do not rekey activities)
Returns:
This object, with all track ID and activity ID rekeyed as specified. All actor IDs in activities will be updated.
"""
assert activities is None or isinstance(activities, dict) and all([k in self.activities() for k in activities.keys()])
assert tracks is None or isinstance(tracks, dict) and all([k in self.tracks() for k in tracks.keys()])
d_old_to_new = {k:hex(int(uuid.uuid4().hex[0:8], 16))[2:] for (k,a) in self.activities().items()} if activities is None else activities
self._activities = dict([(d_old_to_new[k], a.id(d_old_to_new[k])) if k in d_old_to_new else (k,a) for (k,a) in self.activities().items()])
d_old_to_new = {k:hex(int(uuid.uuid4().hex[0:8], 16))[2:] for (k,t) in self.tracks().items()} if tracks is None else tracks
self._tracks = dict([(d_old_to_new[k], t.id(d_old_to_new[k])) if k in d_old_to_new else (k,t) for (k,t) in self.tracks().items()])
for (k,v) in d_old_to_new.items():
self.activitymap(lambda a: a.replaceid(k,v) )
return self
def annotation(self):
"""Return an iterator over annotations in each frame.
```python
for y in self.annotation():
for (bb,a) in y:
print((bb,a))
```
Yields:
for each frame yield the tuple: ( (`vipy.object.Detection`, (tuple of `vipy.activity.Activity` performed by the actor in this bounding box)), ... )
.. note:: The preferred method for accessing annotations is a frame iterator, which includes pixels. However, this method provides access to just the annotations without pixels.
"""
endframe = max([a.endframe() for a in self.activitylist()]+[t.endframe() for (tk,t) in self.tracks().items()]) if (len(self._tracks) > 0 or len(self._activities) > 0) else 0
for k in range(0,endframe):
yield tuple( [tuple( [t[k] if t.during(k) else None, tuple( [a for a in self.activitylist() if a.during(k) and a.hastrackoverlap(t)] ) ]) for t in self.tracklist()])
def label(self):
"""Return an iterator over labels in each frame"""
endframe = max([a.endframe() for a in self.activitylist()]+[t.endframe() for (tk,t) in self.tracks().items()]) if (len(self._tracks) > 0 or len(self._activities) > 0) else 0
for k in range(0,endframe):
yield self.labels(k)
def labels(self, k=None):
"""Return a set of all object and activity labels in this scene, or at frame int(k)"""
return self.activitylabels(k).union(self.objectlabels(k))
def activitylabel(self, startframe=None, endframe=None):
"""Return an iterator over activity labels in each frame, starting from startframe and ending when there are no more activities"""
endframe = endframe if endframe is not None else (max([a.endframe() for a in self.activitylist()]) if len(self.activities())>0 else 0)
startframe = startframe if startframe is not None else (min([a.startframe() for a in self.activitylist()]) if len(self.activities())>0 else 0)
assert startframe <= endframe
for k in range(startframe, endframe):
yield self.activitylabels(k)
def activitylabels(self, startframe=None, endframe=None):
"""Return a set of all activity categories in this scene, or at startframe, or in range [startframe, endframe]"""
if startframe is None:
return set([a.category() for a in self.activities().values()])
elif startframe is not None and endframe is None:
return set([a.category() for a in self.activities().values() if a.during(startframe)])
elif startframe is not None and endframe is not None:
return [set([a.category() for a in self.activities().values() if a.during(k)]) for k in range(startframe, endframe)]
else:
raise ValueError('Invalid input - must specify both startframe and endframe, or only startframe')
def objectlabels(self, k=None, lower=False):
"""Return a python set of all activity categories in this scene, or at frame k.
Args:
k: [int] The object labels present at frame k. If k=None, then all object labels in the video
lower: [bool] If true, return the object labels in alll lower case for case invariant string comparisonsn
"""
return set([t.category() if not lower else t.category().lower() for t in self.tracks().values() if k is None or t.during(k)])
def categories(self):
"""Alias for labels()"""
return self.labels()
def activity_categories(self):
"""Alias for activitylabels()"""
return self.activitylabels()
def hasactivities(self):
"""Does this video have any activities?"""
return len(self._activities) > 0
def hasactivity(self, activityid):
"""Does this video have this activity id?"""
return activityid in self.activities()
def hastracks(self):
"""Does this video have any tracks?"""
return len(self._tracks) > 0
def hastrack(self, trackid):
"""Does the video have this trackid?
.. note:: Track IDs are available as vipy.object.Track().id()
"""
return trackid in self.tracks()
def add(self, obj, category=None, attributes=None, rangecheck=True, frame=None, fluent=False):
"""Add the object obj to the scene, and return an index to this object for future updates
This function is used to incrementally build up a scene frame by frame. Obj can be one of the following types:
- obj = vipy.object.Detection(), this must be called from within a frame iterator (e.g. for im in video) to get the current frame index
- obj = vipy.object.Track()
- obj = vipy.activity.Activity()
- obj = [xmin, ymin, width, height], with associated category kwarg, this must be called from within a frame iterator to get the current frame index
It is recomended that the objects are added as follows. For a v=vipy.video.Scene():
```python
for im in v:
# Do some processing on frame im to detect objects
(object_labels, xywh) = object_detection(im)
# Add them to the scene, note that each object instance is independent in each frame, use tracks for object correspondence
for (lbl,bb) in zip(object_labels, xywh):
v.add(bb, lbl)
# Do some correspondences to track objects
t2 = v.add( vipy.object.Track(...) )
# Update a previous track to add a keyframe
v.track(t2).add( ... )
```
The frame iterator will keep track of the current frame in the video and add the objects in the appropriate place. Alternatively,
```python
v.add(vipy.object.Track(..), frame=k)
```
Args:
obj: A conformal python object to add to the scene (`vipy.object.Detection`, `vipy.object.Track`, `vipy.activity.Activity`, [xmin, ymin, width, height]
category: Used if obj is an xywh tuple
attributes: Used only if obj is an xywh tuple
frame: [int] The frame to add the object
rangecheck: [bool] If true, check if the object is within the image rectangle and throw an exception if not. This requires introspecting the video shape using `vipy.video.Video.shape`.
fluent: [bool] If true, return self instead of the object index
"""
if isinstance(obj, vipy.object.Detection):
assert frame is not None, "add() for vipy.object.Detection() must be added during frame iteration (e.g. for im in video: )"
k = frame
if obj.hasattribute('trackid') and obj.attributes['trackid'] in self.tracks():
# The attribute "trackid" is set for a detection when interpolating a track at a frame. This is useful for reconstructing a track from previously enumerated detections
trackid = obj.attributes['trackid']
self.trackmap(lambda t: t.update(k, obj) if obj.attributes['trackid'] == t.id() else t)
return None if not fluent else self
else:
t = vipy.object.Track(category=obj.category(), keyframes=[k], boxes=[obj], boundary='strict', attributes=obj.attributes, trackid=obj.attributes['trackid'] if obj.hasattribute('trackid') else None, framerate=self.framerate())
if rangecheck and not obj.hasoverlap(width=self.width(), height=self.height()):
raise ValueError("Track '%s' does not intersect with frame shape (%d, %d)" % (str(t), self.height(), self.width()))
self.tracks()[t.id()] = t # by-reference
return t.id() if not fluent else self
elif isinstance(obj, vipy.object.Track):
if rangecheck and not obj.boundingbox().isinside(vipy.geometry.imagebox(self.shape())):
obj = obj.imclip(self.width(), self.height()) # try to clip it, will throw exception if all are bad
warnings.warn('[vipy.video.add]: Clipping trackid=%s track="%s" to image rectangle' % (str(obj.id()), str(obj)))
if obj.framerate() != self.framerate():
obj.framerate(self.framerate()) # convert framerate of track to framerate of video
self.tracks()[obj.id()] = obj # by-reference
return obj.id() if not fluent else self
elif isinstance(obj, vipy.activity.Activity):
if rangecheck and obj.startframe() >= obj.endframe():
raise ValueError("Activity '%s' has invalid (startframe, endframe)=(%d, %d)" % (str(obj), obj.startframe(), obj.endframe()))
self.activities()[obj.id()] = obj # by-reference, activity may have no tracks
return obj.id() if not fluent else self
elif (istuple(obj) or islist(obj)) and len(obj) == 4 and isnumber(obj[0]):
assert frame is not None, "add() for obj=xywh must be added at a specific frame"
t = vipy.object.Track(category=category, keyframes=[frame], boxes=[vipy.geometry.BoundingBox(xywh=obj)], boundary='strict', attributes=attributes, framerate=self.framerate())
if rangecheck and not t.boundingbox().isinside(vipy.geometry.imagebox(self.shape())):
t = t.imclip(self.width(), self.height()) # try to clip it, will throw exception if all are bad
warnings.warn('Clipping track "%s" to image rectangle' % (str(t)))
self.tracks()[t.id()] = t # by-reference
return t.id() if not fluent else self
else:
raise ValueError('Undefined object type "%s" to be added to scene - Supported types are obj in ["vipy.object.Detection", "vipy.object.Track", "vipy.activity.Activity", "[xmin, ymin, width, height]"]' % str(type(obj)))
def delete(self, id):
"""Delete a given track or activity by id, if present"""
return self.trackfilter(lambda t: t.id() != id).activityfilter(lambda a: a.id() != id)
def addframe(self, im, frame):
"""Add im=vipy.image.Scene() into vipy.video.Scene() at given frame. The input image must have been generated using im=self[k] for this to be meaningful, so that trackid can be associated"""
assert isinstance(im, vipy.image.Scene), "Invalid input - Must be vipy.image.Scene()"
assert im.shape() == self.shape(), "Frame input (shape=%s) must be same shape as video (shape=%s)" % (str(im.shape()), str(self.shape()))
# Copy framewise vipy.image.Scene() into vipy.video.Scene().
self.numpy()[frame] = im.array() # will trigger copy
for bb in im.objects():
self.trackmap(lambda t: t.update(frame, bb) if bb.attributes['trackid'] == t.id() else t)
return self
def clear(self):
"""Remove all activities and tracks from this object"""
self._activities = {}
self._tracks = {}
return self
def cleartracks(self):
self._tracks = {}
return self
def clearactivities(self):
self._activities = {}
return self
def replace(self, other, frame=None):
"""Replace tracks and activities with other if activity/track is during frame"""
assert isinstance(other, vipy.video.Scene)
self.activities([a for a in other.activitylist() if frame is None or a.during(frame)])
self.tracks([t for t in other.tracklist() if frame is None or t.during(frame)])
return self
def json(self, encode=True):
"""Return JSON encoded string of this object. This may fail if attributes contain non-json encodeable object"""
try:
json.loads(json.dumps(self.attributes)) # round trip for the attributes dictionary - this can be any arbitrary object and contents may not be json encodable
except:
raise ValueError('Video contains non-JSON encodable object in self.attributes dictionary - Try self.sanitize() or to clear with self.attributes = {} first')
d = json.loads(super().json())
d['tracks'] = {k:t.json(encode=False) for (k,t) in self.tracks().items()}
d['activities'] = {k:a.json(encode=False) for (k,a) in self.activities().items()}
try:
return json.dumps(d) if encode else d
except:
# Legacy support for non JSON serializable objects (<= vipy.1.9.2)
v = self.clone()
for (ti, t) in v.tracks().items():
for o in t._keyboxes:
vipy.geometry.BoundingBox.cast(o, flush=True)
o.float().significant_digits(2)
for (ai, a) in v.activities().items():
a._startframe = int(a._startframe)
a._endframe = int(a._endframe)
return v.json(encode=encode)
def csv(self, outfile=None):
"""Export scene to CSV file format with header. If there are no tracks, this will be empty. """
assert self.load().isloaded()
csv = [(self.filename(), # video filename
k, # frame number (zero indexed)
d.category(), d.shortlabel(), # track category and shortlabel (displayed in caption)
';'.join([self.activities(id=aid).category() for aid in tolist(d.attributes['activityid'])] if 'activityid' in d.attributes else ''), # semicolon separated activity category associated with track
d.xmin(), d.ymin(), d.width(), d.height(), # bounding box
d.attributes['trackid'], # globally unique track ID
';'.join([aid for aid in tolist(d.attributes['activityid'])] if 'activityid' in d.attributes else '')) # semicolon separated activity ID associated with track
for (k,im) in enumerate(self) for d in im.objects()]
csv = [('# video_filename', 'frame_number', 'object_category', 'object_shortlabel', 'activity categories(;)', 'xmin', 'ymin', 'width', 'height', 'track_id', 'activity_ids(;)')] + csv
return writecsv(csv, outfile) if outfile is not None else csv
def framerate(self, fps=None):
"""Change the input framerate for the video and update frame indexes for all annotations.
```python
fps = self.framerate()
self.framerate(fps=15.0)
```
"""
if fps is None:
return self._framerate
elif float(fps) == self._framerate:
return self
else:
assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
fps = float(fps)
self._startframe = int(round(self._startframe * (fps/self._framerate))) if self._startframe is not None else self._startframe # __repr__ only
self._endframe = int(round(self._endframe * (fps/self._framerate))) if self._endframe is not None else self._endframe # __repr__only
self._tracks = {k:t.framerate(fps) for (k,t) in self.tracks().items()}
self._activities = {k:a.framerate(fps) for (k,a) in self.activities().items()}
if 'fps=' in self._ffmpeg_commandline():
self._update_ffmpeg('fps', fps) # replace fps filter, do not add to it
else:
self._ffmpeg = self._ffmpeg.filter('fps', fps=fps, round='up') # create fps filter first time
self._framerate = fps
return self
def activitysplit(self, idx=None):
"""Split the scene into k separate scenes, one for each activity. Do not include overlapping activities.
Args:
idx: [int],[tuple],[list]. Return only those activities in the provided activity index list, where the activity index is the integer index of the activity in the video.
.. note:: This is useful for union()
"""
vid = self.clone(flushforward=True)
if any([(a.endframe()-a.startframe()) <= 0 for a in vid.activities().values()]):
warnings.warn('Filtering invalid activity with degenerate lengths: %s' % str([a for a in vid.activities().values() if (a.endframe()-a.startframe()) <= 0]))
activities = sorted([a.clone() for a in vid.activities().values() if (a.endframe()-a.startframe()) > 0], key=lambda a: a.startframe()) # only activities with at least one frame, sorted in temporal order
tracks = [ [t.clone() for (tid, t) in vid.tracks().items() if a.hastrack(t)] for a in activities] # tracks associated with each activity (may be empty)
vid._activities = {} # for faster clone
vid._tracks = {} # for faster clone
return [vid.clone()
.setattribute('_instance_id', ('%s_%d' % (vid.videoid(), k)) if not vid.hasattribute('_instance_id') else vid.getattribute('_instance_id'))
.activities(pa)
.tracks(t)
.setactorid(pa.actorid())
for (k,(pa,t)) in enumerate(zip(activities, tracks)) if idx is None or k in tolist(idx)]
def tracksplit(self):
"""Split the scene into k separate scenes, one for each track. Each scene starts at frame 0 and is a shallow copy of self containing exactly one track.
- This is useful for visualization by breaking a scene into a list of scenes that contain only one track.
- The attribute '_trackindex' is set in the attributes dictionary to provide provenance for the track relative to the source video
.. notes:: Use clone() to create a deep copy if needed.
"""
return [self.clone(shallow=True).setattribute('_trackindex', k).tracks(t).activityfilter(lambda a: a.hastrack(tk)) for (k,(tk,t)) in enumerate(self.tracks().items())]
def trackclip(self):
"""Split the scene into k separate scenes, one for each track. Each scene starts and ends when the track starts and ends"""
return [t.setattribute('_instance_id', '%s_%d_trackclip' % (t.videoid(), k)).clip(t.track(t.actorid()).startframe(), t.track(t.actorid()).endframe()) for (k,t) in enumerate(self.tracksplit())]
def activityclip(self, padframes=0, multilabel=True, idx=None, padto=None, padtosec=None):
"""Return a list of `vipy.video.Scene` objects each clipped to be temporally centered on a single activity, with an optional padframes before and after.
Args:
padframes: [int] for symmetric padding same before and after
padframes: [tuple] (int, int) for asymmetric padding before and after
padframes: [list[tuples]] [(int, int), ...] for activity specific asymmetric padding. See also padto.
multilabel: [bool] include overlapping multilabel secondary activities in each activityclip
idx: [int], [tuple], [list]. The indexes of the activities to return, where the index is the integer index order of the activity in the video. Useful for complex videos.
padto: [int] padding so that each activity clip is at least padto frames long, with symmetric padding around the activity.
padtosec: [float] padding so that each activity clip is at least padtosec seconds long, with symmetric padding around the activity.
Returns:
A list of `vipy.video.Scene` each cloned from the source video and clipped on one activity in the scene
.. notes::
- The Scene() category is updated to be the activity category of the clip, and only the objects participating in the activity are included.
- Clips are returned ordered in the temporal order they appear in the video.
- The returned vipy.video.Scene() objects for each activityclip are clones of the video, with the video buffer flushed.
- Each activityclip() is associated with each activity in the scene, and includes all other secondary activities that the objects in the primary activity also perform (if multilabel=True). See activityclip().labels().
- Calling activityclip() on activityclip(multilabel=True) will duplicate activities, due to the overlapping secondary activities being included in each clip with an overlap. Be careful!
"""
assert isinstance(padframes, int) or istuple(padframes) or islist(padframes)
vid = self.clone(flushforward=True)
if any([(a.endframe()-a.startframe()) <= 0 for a in vid.activities().values()]):
warnings.warn('Filtering invalid activity clips with degenerate lengths: %s' % str([a for a in vid.activities().values() if (a.endframe()-a.startframe()) <= 0]))
primary_activities = sorted([a.clone() for a in vid.activities().values() if (a.endframe()-a.startframe()) > 0], key=lambda a: a.startframe()) # only activities with at least one frame, sorted in temporal order
padframelist = [padframes if istuple(padframes) else (padframes, padframes) for k in range(len(primary_activities))] if not islist(padframes) else padframes
tracks = [ [t.clone() for (tid, t) in vid.tracks().items() if a.hastrackoverlap(t)] for a in primary_activities] # tracks associated with and temporally overlapping each primary activity (may be empty)
secondary_activities = [[sa.clone() for sa in primary_activities if (sa.id() != pa.id() and pa.clone().temporalpad((prepad, postpad)).hasoverlap(sa) and (len(T)==0 or any([sa.hastrack(t) for t in T])))] for (pa, T, (prepad,postpad)) in zip(primary_activities, tracks, padframelist)] # overlapping secondary activities that includes any track in the primary activity
secondary_activities = [sa if multilabel else [] for sa in secondary_activities]
vid._activities = {} # for faster clone
vid._tracks = {} # for faster clone
maxframes = self.duration_in_frames() if (padframes != 0 or padto is not None or padtosec is not None) else None
if padto is not None or padtosec is not None:
cliplist = [(a.startframe(), a.endframe()) for a in primary_activities]
padto = padto if padto is not None else int(round(padtosec*self.framerate()))
padframelist = [(sp+int(np.ceil(((padto-(ef-sf))/2))), ep+int(np.ceil(((padto-(ef-sf))/2)))) if (ef-sf)<padto else (sp,ep) for ((sp,ep),(sf,ef)) in zip(padframelist, cliplist)]
padframelist = [(0,ep+(-sp)) if (sp<0) else ((sp+(ep-(maxframes-ef)), maxframes-ef) if ((ef+ep)>maxframes) else (sp,ep)) for ((sp,ep),(sf,ef)) in zip(padframelist, cliplist)] # truncate to video boundary
return [vid.clone()
.activities([pa]+sa) # primary activity first
.tracks(t)
.clip(startframe=max(pa.startframe()-prepad, 0), endframe=min(pa.endframe()+postpad, (maxframes if maxframes is not None else pa.endframe()+postpad)))
.category(pa.category())
.setactorid(pa.actorid()) # actor is actor of primary activity
.setattribute('_instance_id', ('%s_%d' % (vid.videoid(), k)) if not vid.hasattribute('_instance_id') else vid.getattribute('_instance_id'))
for (k,(pa,sa,t,(prepad,postpad))) in enumerate(zip(primary_activities, secondary_activities, tracks, padframelist))
if (idx is None or k in tolist(idx))]
def noactivitylist(self, label=None, shortlabel=None):
"""Return a list of `vipy.activity.Activity` which are segments of each track with no associated activities.
Args:
label: [str] The activity label to give the background activities. Defaults to the track category (lowercase)
Returns:
A list of `vipy.activity.Activity` such that each activity is associated with a track with temporal support where no activities are performed. The union of activitylist() and noactivitylist() should cover the temporal support of all track
"""
A = []
for t in self.tracklist():
(startframe, endframe) = (t.startframe(), t.startframe())
for k in range(t.startframe(), t.endframe()):
if not any([a.hastrack(t) and a.during(k) for a in self.activitylist()]) and k < (t.endframe()-1):
endframe = k # background
else:
if startframe < endframe:
A.append(vipy.activity.Activity(label=t.category() if label is None else label,
shortlabel='' if label is None else (label if shortlabel is None else shortlabel),
startframe=startframe,
endframe=endframe,
actorid=t.id(),
framerate=self.framerate(),
attributes={'_noactivitylist':True}))
(startframe, endframe) = (k+1,k+1)
return A
def noactivityclip(self, label=None, shortlabel=None, padframes=0):
"""Return a list of vipy.video.Scene() each clipped on a track segment that has no associated activities.
Args:
label: [str] The activity label to give the background activities. Defaults to the track category (lowercase)
shortlabel: [str] The activity shortlabel to give the background activities when visualized. Defaults to the track category (lowercase)
padframes: [int] The amount of temporal padding to apply to the clips before and after in frames. See `vipy.video.Scene.activityclip` for options.
Returns:
A list of `vipy.video.Scene` each cloned from the source video and clipped in the temporal region between activities. The union of activityclip() and noactivityclip() should equal the entire video.
.. notes::
- Each clip will contain exactly one activity "Background" which is the interval for this track where no activities are occurring
- Each clip will be at least one frame long
"""
A = self.clone().activities(self.noactivitylist(label=label, shortlabel=shortlabel)).activityclip(padframes=padframes, multilabel=False)
return [a.setattribute('_instance_id', '%s_bg' % a.getattribute('_instance_id')) for a in A]
def trackbox(self, dilate=1.0):
"""The trackbox is the union of all track bounding boxes in the video, or None if there are no tracks
Args:
dilate: [float] A dilation factor to apply to the trackbox before returning. See `vipy.geometry.BoundingBox.dilate`
Returns:
A `vipy.geometry.BoundingBox` which is the union of all boxes in the track (or None if no boxes exist)
"""
boxes = [t.clone().boundingbox() for t in self.tracklist()]
boxes = [bb.dilate(dilate) for bb in boxes if bb is not None]
return boxes[0].union(boxes[1:]) if len(boxes) > 0 else None
def framebox(self):
"""Return the bounding box for the image rectangle.
Returns:
A `vipy.geometry.BoundingBox` which defines the image rectangle
.. notes: This requires calling `vipy.video.Video.preview` to get the frame shape from the current filter chain, which touches the video file"""
return vipy.geometry.BoundingBox(xmin=0, ymin=0, width=self.width(), height=self.height())
def trackcrop(self, dilate=1.0, maxsquare=False, zeropad=True):
"""Return the trackcrop() of the scene which is the crop of the video using the `vipy.video.Scene.trackbox`.
Args:
zeropad: [bool] If True, the zero pad the crop if it is outside the image rectangle, otherwise return only valid pixels inside the image rectangle
maxsquare: [bool] If True, make the bounding box the maximum square before cropping
dilate: [float] The dilation factor to apply to the trackbox prior to cropping
Returns:
A `vipy.video.Scene` object from cropping the video using the trackbox. If there are no tracks, return None.
"""
bb = self.trackbox(dilate) # may be None if trackbox is degenerate
return self.crop(bb.maxsquareif(maxsquare), zeropad=zeropad) if bb is not None else None
def activitybox(self, activityid=None, dilate=1.0):
"""The activitybox is the union of all activity bounding boxes in the video, which is the union of all tracks contributing to all activities. This is most useful after activityclip().
The activitybox is the smallest bounding box that contains all of the boxes from all of the tracks in all activities in this video.
"""
activities = [a for (k,a) in self.activities().items() if (activityid is None or k in set(activityid))]
boxes = [t.clone().boundingbox().dilate(dilate) for t in self.tracklist() if any([a.hastrack(t) for a in activities])]
return boxes[0].union(boxes[1:]) if len(boxes) > 0 else vipy.geometry.BoundingBox(xmin=0, ymin=0, width=int(self.width()), height=int(self.height()))
def activitycuboid(self, activityid=None, dilate=1.0, maxdim=256, bb=None):
"""The activitycuboid() is the fixed square spatial crop corresponding to the activitybox (or supplied bounding box), which contains all of the valid activities in the scene. This is most useful after activityclip().
The activitycuboid() is a spatial crop of the video corresponding to the supplied boundingbox or the square activitybox().
This crop must be resized such that the maximum dimension is provided since the crop can be tiny and will not be encodable by ffmpeg
"""
bb = self.activitybox(activityid).maxsquare() if bb is None else bb
assert bb is None or isinstance(bb, vipy.geometry.BoundingBox)
assert bb.issquare(), "Add support for non-square boxes"
return self.clone().crop(bb.dilate(dilate).int(), zeropad=True).resize(maxdim, maxdim) # crop triggers preview()
def activitysquare(self, activityid=None, dilate=1.0, maxdim=256):
"""The activity square is the maxsquare activitybox that contains only valid (non-padded) pixels interior to the image"""
bb = self.activitybox(activityid).maxsquare().dilate(dilate).int().iminterior(self.width(), self.height()).minsquare()
return self.activitycuboid(activityid, dilate=1.0, maxdim=maxdim, bb=bb)
def activitytube(self, activityid=None, dilate=1.0, maxdim=256):
"""The activitytube() is a sequence of crops where the spatial box changes on every frame to track the activity.
The box in each frame is the square activitybox() for this video which is the union of boxes contributing to this activity in each frame.
This function does not perform any temporal clipping. Use activityclip() first to split into individual activities.
Crops will be optionally dilated, with zeropadding if the box is outside the image rectangle. All crops will be resized so that the maximum dimension is maxdim (and square by default)
"""
vid = self.clone().load() # triggers load
self.activityfilter(lambda a: activityid is None or a.id() in set(activityid)) # only requested IDs (or all of them)
frames = [im.padcrop(im.boundingbox().maxsquare().dilate(dilate).int()).resize(maxdim, maxdim) for im in vid if im.boundingbox() is not None] # track interpolation, for frames with boxes only
if len(frames) != len(vid):
warnings.warn('[vipy.video.activitytube]: Removed %d frames with no spatial bounding boxes' % (len(vid) - len(frames)))
vid.attributes['__activtytube'] = {'truncated':len(vid) - len(frames)} # provenance to reject
if len(frames) == 0:
warnings.warn('[vipy.video.activitytube]: Resulting video is empty! Setting activitytube to zero')
frames = [ vid[0].resize(maxdim, maxdim).zeros() ] # empty frame
vid.attributes['__activitytube'] = {'empty':True} # provenance to reject
vid._tracks = {ti:vipy.object.Track(keyframes=[f for (f,im) in enumerate(frames) for d in im.objects() if d.attributes['trackid'] == ti],
boxes=[d for (f,im) in enumerate(frames) for d in im.objects() if d.attributes['trackid'] == ti],
category=t.category(), trackid=ti, framerate=self.framerate())
for (k,(ti,t)) in enumerate(self.tracks().items())} # replace tracks with boxes relative to tube
return vid.array(np.stack([im.numpy() for im in frames]))
def actortube(self, trackid=None, dilate=1.0, maxdim=256, strict=True):
"""The actortube() is a sequence of crops where the spatial box changes on every frame to track the primary actor performing an activity.
The box in each frame is the square box centered on the primary actor performing the activity, dilated by a given factor (the original box around the actor is unchanged, this just increases the context, with zero padding)
This function does not perform any temporal clipping. Use activityclip() first to split into individual activities.
All crops will be resized so that the maximum dimension is maxdim (and square by default)
"""
assert trackid is not None or len(self.tracks()) == 1, "Track ID must be provided if there exists more than one track in the scene"
trackid = trackid if trackid is not None else list(self.tracks().keys())[0]
assert self.hastrack(trackid), "Track ID %s not found - Actortube requires a track ID in the scene (tracks=%s)" % (str(trackid), str(self.tracks()))
vid = self.clone().load() # triggers load
t = vid.tracks(id=trackid) # actor track
frames = [im.padcrop(t[k].maxsquare().dilate(dilate).int()).resize(maxdim, maxdim) for (k,im) in enumerate(vid) if t.during(k)] if len(t)>0 else [] # actor interpolation, padding may introduce frames with no tracks
if len(frames) == 0:
if not strict:
warnings.warn('[vipy.video.actortube]: Empty track for trackid="%s" - Setting actortube to zero' % trackid)
frames = [ vid[0].resize(maxdim, maxdim).zeros() ] # empty frame
vid.attributes['__actortube'] = {'empty':True} # provenance to reject
else:
raise ValueError('[vipy.video.actortube]: Empty track for track=%s, trackid=%s' % (str(t), trackid))
vid._tracks = {ti:vipy.object.Track(keyframes=[f for (f,im) in enumerate(frames) for d in im.objects() if d.attributes['trackid'] == ti], # keyframes zero indexed, relative to [frames]
boxes=[d for (f,im) in enumerate(frames) for d in im.objects() if d.attributes['trackid'] == ti], # one box per frame
category=t.category(), trackid=ti, framerate=self.framerate()) # preserve trackid
for (k,(ti,t)) in enumerate(self.tracks().items())} # replace tracks with interpolated boxes relative to tube defined by actor
return vid.array(np.stack([im.numpy() for im in frames]))
def speed(self, s):
"""Change the speed by a multiplier s. If s=1, this will be the same speed, s=0.5 for half-speed (slower playback), s=2 for double-speed (faster playback)"""
super().speed(s)
return self.trackmap(lambda t: t.framerate(speed=s)).activitymap(lambda a: a.framerate(speed=s))
def clip(self, startframe, endframe=None):
"""Clip the video to between (startframe, endframe). This clip is relative to clip() shown by __repr__().
Args:
startframe: [int] the start frame relative to the video framerate() for the clip
endframe: [int] the end frame relative to the video framerate for the clip, may be none
Returns:
This video object, clipped so that a load() will result in frame=0 equivalent to startframe. All tracks and activities updated relative to the new startframe.
.. note:
- This return a clone of the video for idempotence
- This does not load the video. This updates the ffmpeg filter chain to temporally trim the video. See self.commandline() for the updated filter chain to run.
"""
assert (endframe is None or startframe <= endframe) and startframe >= 0, "Invalid start and end frames (%s, %s)" % (str(startframe), str(endframe))
v = self.clone()
if not v.isloaded():
# -- Copied from super().clip() to allow for clip on clone (for indempotence)
# -- This code copy is used to avoid super(Scene, self.clone()) which screws up class inheritance for iPython reload
assert not v.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
timestamp_in_seconds = ((v._startframe if v._startframe is not None else 0)+startframe)/float(v.framerate())
v._update_ffmpeg_seek(timestamp_in_seconds)
if endframe is not None:
v._ffmpeg = v._ffmpeg.setpts('PTS-STARTPTS') # reset timestamp to 0 before trim filter
v._ffmpeg = v._ffmpeg.trim(start=0, end=(endframe-startframe)/self.framerate()) # must be in seconds to allow for framerate conversion
v._ffmpeg = v._ffmpeg.setpts('PTS-STARTPTS') # reset timestamp to 0 after trim filter
v._startframe = startframe if v._startframe is None else v._startframe + startframe # for __repr__ only
v._endframe = (v._startframe + (endframe-startframe)) if endframe is not None else v._endframe # for __repr__ only
# -- end copy
else:
endframe = endframe if endframe is not None else len(self._array)
v._array = self._array[startframe:endframe]
(v._startframe, v._endframe) = (0, endframe-startframe)
v._tracks = {k:t.offset(dt=-startframe).truncate(startframe=0, endframe=(endframe-startframe) if endframe is not None else None) for (k,t) in v.tracks().items()} # may be degenerate
v._activities = {k:a.offset(dt=-startframe).truncate(startframe=0, endframe=(endframe-startframe) if endframe is not None else None) for (k,a) in v.activities().items()} # may be degenerate
return v.trackfilter(lambda t: len(t)>0).activityfilter(lambda a: len(a)>0) # remove degenerate tracks and activities
def crop(self, bb, zeropad=True):
"""Crop the video using the supplied box, update tracks relative to crop, video is zeropadded if box is outside frame rectangle"""
assert isinstance(bb, vipy.geometry.BoundingBox), "Invalid input"
bb = bb.int()
bbc = bb.clone().imclipshape(self.width(), self.height()).int()
#if zeropad and bb != bbc:
# self.zeropad(bb.width()-bbc.width(), bb.height()-bbc.height())
# bb = bb.offset(bb.width()-bbc.width(), bb.height()-bbc.height())
super().crop(bb, zeropad=zeropad) # range check handled here to correctly apply zeropad
bb = bb if zeropad else bbc
self._tracks = {k:t.offset(dx=-bb.xmin(), dy=-bb.ymin()) for (k,t) in self.tracks().items()}
return self
def zeropad(self, padwidth, padheight):
"""Zero pad the video with padwidth columns before and after, and padheight rows before and after
Update tracks accordingly.
"""
assert isinstance(padwidth, int) and isinstance(padheight, int)
super().zeropad(padwidth, padheight)
self._tracks = {k:t.offset(dx=padwidth, dy=padheight) for (k,t) in self.tracks().items()}
return self
def fliplr(self):
(H,W) = self.shape() # yuck, need to get image dimensions before filter
self._tracks = {k:t.fliplr(H,W) for (k,t) in self.tracks().items()}
super().fliplr()
return self
def flipud(self):
assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
(H,W) = self.shape() # yuck, need to get image dimensions before filter
self._tracks = {k:t.flipud(H,W) for (k,t) in self.tracks().items()}
super().flipud()
return self
def rot90ccw(self):
assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
(H,W) = self.shape() # yuck, need to get image dimensions before filter
self._tracks = {k:t.rot90ccw(H,W) for (k,t) in self.tracks().items()}
super().rot90ccw()
return self
def rot90cw(self):
assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
(H,W) = self.shape() # yuck, need to get image dimensions before filter
self._tracks = {k:t.rot90cw(H,W) for (k,t) in self.tracks().items()}
super().rot90cw()
return self
def resize(self, rows=None, cols=None, width=None, height=None):
"""Resize the video to (rows, cols), preserving the aspect ratio if only rows or cols is provided"""
assert not (rows is not None and height is not None) # cannot be both
assert not (cols is not None and width is not None) # cannot be both
rows = rows if rows is not None else height
cols = cols if cols is not None else width
assert rows is not None or cols is not None, "Invalid input"
(H,W) = self.shape() # yuck, need to get image dimensions before filter, manually set this prior to calling resize if known
sy = rows / float(H) if rows is not None else cols / float(W)
sx = cols / float(W) if cols is not None else rows / float(H)
self._tracks = {k:t.scalex(sx) for (k,t) in self.tracks().items()}
self._tracks = {k:t.scaley(sy) for (k,t) in self.tracks().items()}
super().resize(rows=rows, cols=cols)
return self
def mindim(self, dim=None):
"""Resize the video so that the minimum of (width,height)=dim, preserving aspect ratio"""
(H,W) = self.shape() # yuck, need to get image dimensions before filter
return min(self.shape()) if dim is None else (self if min(H,W) == dim else (self.resize(cols=dim) if W<H else self.resize(rows=dim)))
def maxdim(self, dim=None):
"""Resize the video so that the maximum of (width,height)=dim, preserving aspect ratio"""
assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
(H,W) = self.shape() # yuck, need to get image dimensions before filter
return max(H,W) if dim is None else (self.resize(cols=dim) if W>H else self.resize(rows=dim))
def rescale(self, s):
"""Spatially rescale the scene by a constant scale factor.
Args:
s: [float] Scale factor > 0 to isotropically scale the image.
"""
assert s == 1 or not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first"
self._tracks = {k:t.rescale(s) for (k,t) in self.tracks().items()}
super().rescale(s)
return self
def startframe(self):
return self._startframe
def extrapolate(self, f, dt=None):
"""Extrapolate the video to frame f and add the extrapolated tracks to the video"""
return self.trackmap(lambda t: t.add(f, t.linear_extrapolation(f, dt=dt if dt is not None else self.framerate()), strict=False))
def dedupe(self, spatial_iou_threshold=0.8, dt=5, tracks=True, activities=True, temporal_iou_threshold=0.8, verbose=True):
"""Find and delete duplicate tracks and activities by overlap.
Track deduplication algorithm
- For each pair of tracks with the same category, find the larest temporal segment that contains both tracks.
- For this segment, compute the IOU for each box interpolated at a stride of dt frames
- Compute the mean IOU for this segment. This is the segment IOU.
- If the segment IOU is greater than the threshold, merge the shorter of the two tracks with the current track.
Activity deduplication algorithm
- For each pair of activities in insertion order
- If the temporal IOU is greater than the threshold, then merge the older activity (later insertion) with the newer activity (earlier insertion)
- Update the actor ID of the merged activity to be that of the newer activity
"""
if tracks:
deleted = set([])
for tj in sorted(self.tracklist(), key=lambda t: len(t), reverse=True): # longest to shortest
for (s, ti) in sorted([(0,t) if (len(tj) < len(t) or t.id() in deleted or t.id() == tj.id() or t.category() != tj.category()) else (tj.fragmentiou(t, dt=dt), t) for t in self.tracklist()], key=lambda x: x[0], reverse=True):
if s > spatial_iou_threshold: # best mean framewise overlap during overlapping segment of two tracks (ti, tj)
if verbose:
print('[vipy.video.dedupe]: merging duplicate track "%s" (id=%s) which overlaps with "%s" (id=%s)' % (ti, ti.id(), tj, tj.id()))
self.tracks()[tj.id()] = tj.union(ti) # merge
self.activitymap(lambda a: a.replace(ti, tj)) # replace merged track reference in activity
deleted.add(ti.id())
self.trackfilter(lambda t: t.id() not in deleted) # remove duplicate tracks
if activities:
deleted = set([])
for (j,aj) in enumerate(self.activitylist()): # preserve insertion order
for ai in self.activitylist()[j+1:]:
if aj.hasoverlap(ai, threshold=temporal_iou_threshold) and ai.id() not in deleted:
if verbose:
print('[vipy.video.dedupe]: merging duplicate activity "%s" (id=%s) which overlaps with "%s" (id=%s)' % (ai, ai.id(), aj, aj.id()))
self.activities()[aj.id()] = aj.union(ai.clone().replaceid(ai.actorid(), aj.actorid())).addid(ai.actorid()) # merge two activities into one, with two tracks
deleted.add(ai.id())
self.activityfilter(lambda a: a.id() not in deleted) # remove duplicate activities
return self
def combine(self, other, tracks=True, activities=True, rekey=True):
"""Combine the activities and tracks from both scenes into self.
.. note:: This does not perform a union, it simply combines dictionaries. For deduplication, see `vipy.video.Scene.union`
"""
assert isinstance(other, Scene), "Invalid input - must be vipy.video.Scene() object and not type=%s" % str(type(other))
assert self.framerate() == other.framerate()
o = other.clone(rekey=True) if rekey else other # make sure keys are unique
if activities:
self.activities().update(o.activities())
if tracks:
self.tracks().update(o.tracks())
return self
def union(self, other, temporal_iou_threshold=0.5, spatial_iou_threshold=0.6, strict=True, overlap='average', percentilecover=0.8, percentilesamples=100, activity=True, track=True):
"""Compute the union two scenes as the set of unique activities and tracks.
A pair of activities or tracks are non-unique if they overlap spatially and temporally by a given IoU threshold. Merge overlapping tracks.
Tracks are merged by considering the mean IoU at the overlapping segment of two tracks with the same category greater than the provided spatial_iou_threshold threshold
Activities are merged by considering the temporal IoU of the activities of the same class greater than the provided temporal_iou_threshold threshold
Args:
Other: Scene or list of scenes for union. Other may be a clip of self at a different framerate, spatial isotropic scake, clip offset
spatial_iou_threshold: The intersection over union threshold for the mean of the two segments of an overlapping track, Disable by setting to 1.0
temporal_iou_threshold: The intersection over union threshold for a temporal bounding box for a pair of activities to be declared duplicates. Disable by setting to 1.0
strict: Require both scenes to share the same underlying video filename
overlap=['average', 'replace', 'keep']
- average: Merge two tracks by averaging the boxes (average=True) if overlapping
- replace: merge two tracks by replacing overlapping boxes with other (discard self)
- keep: merge two tracks by keeping overlapping boxes with other (discard other)
percentilecover: [0,1]: When determining the assignment of two tracks, compute the percentilecover of two tracks by ranking the cover in the overlapping segment and computing the mean of the top-k assignments, where k=len(segment)*percentilecover.
percentilesamples: [>1]: the number of samples along the overlapping scemgne for computing percentile cover
activity: [bool]: union() of activities only
track: [bool]: union() of tracks only
Returns:
Updates this scene to include the non-overlapping activities from other. By default, it takes the strict union of all activities and tracks.
.. note::
- This is useful for merging scenes computed using a lower resolution/framerate/clipped object or activity detector without running the detector on the high-res scene
- This function will preserve the invariance for v == v.clear().union(v.rescale(0.5).framerate(5).activityclip()), to within the quantization error of framerate() downsampling.
- percentileiou is a robust method of track assignment when boxes for two tracks (e.g. ground truth and detections) where one track may deform due to occlusion.
"""
assert overlap in ['average', 'replace', 'keep'], "Invalid input - 'overlap' must be in [average, replace, keep]"
assert spatial_iou_threshold >= 0 and spatial_iou_threshold <= 1, "invalid spatial_iou_threshold, must be between [0,1]"
assert temporal_iou_threshold >= 0 and temporal_iou_threshold <= 1, "invalid temporal_iou_threshold, must be between [0,1]"
assert percentilesamples >= 1, "invalid samples, must be >= 1"
if not activity and not track:
return self # nothing to do
sc = self.clone() # do not change self yet, make a copy then merge at the end
for o in tolist(other):
assert isinstance(o, Scene), "Invalid input - must be vipy.video.Scene() object and not type=%s" % str(type(o))
if strict:
assert sc.filename() == o.filename(), "Invalid input - Scenes must have the same underlying video. Disable this with strict=False."
oc = o.clone() # do not change other, make a copy
# Key collision?
if len(set(sc.tracks().keys()).intersection(set(oc.tracks().keys()))) > 0:
print('[vipy.video.union]: track key collision - Rekeying other... Use other.rekey() to suppress this warning.')
oc.rekey()
if len(set(sc.activities().keys()).intersection(set(oc.activities().keys()))) > 0:
print('[vipy.video.union]: activity key collision - Rekeying other... Use other.rekey() to suppress this warning.')
oc.rekey()
# Similarity transform? Other may differ from self by a temporal scale (framerate), temporal translation (clip) or spatial isotropic scale (rescale)
assert np.isclose(sc.aspect_ratio(), oc.aspect_ratio(), atol=1E-2), "Invalid input - Scenes must have the same aspect ratio"
if sc.width() != oc.width():
oc = oc.rescale(sc.width() / oc.width()) # match spatial scale
if not np.isclose(sc.framerate(), oc.framerate(), atol=1E-3):
oc = oc.framerate(sc.framerate()) # match temporal scale (video in oc will not match, only annotations)
if sc.startframe() != oc.startframe():
dt = (oc.startframe() if oc.startframe() is not None else 0) - (sc.startframe() if sc.startframe() is not None else 0)
oc = oc.trackmap(lambda t: t.offset(dt=dt)).activitymap(lambda a: a.offset(dt=dt)) # match temporal translation of tracks and activities
oc = oc.trackfilter(lambda t: ((not t.isdegenerate()) and len(t)>0), activitytrack=False)
# Merge other tracks into selfclone: one-to-many mapping from self to other
merged = {} # dictionary mapping trackid in other to the trackid in self, each track in other can be merged at most once
for ti in sorted(sc.tracklist(), key=lambda t: len(t), reverse=True): # longest to shortest
for tj in sorted(oc.tracklist(), key=lambda t: len(t), reverse=True):
if ti.category() == tj.category() and (tj.id() not in merged) and tj.segment_percentilecover(sc.track(ti.id()), percentile=percentilecover, samples=percentilesamples) > spatial_iou_threshold: # mean framewise overlap during overlapping segment of two tracks
sc.tracks()[ti.id()] = sc.track(ti.id()).union(tj, overlap=overlap) # merge duplicate/fragmented tracks from other into self, union() returns clone
merged[tj.id()] = ti.id()
print('[vipy.video.union]: merging track "%s"(id=%s) + "%s"(id=%s) for scene "%s"' % (str(ti), str(ti.id()), str(tj), str(tj.id()), str(sc)))
oc.trackfilter(lambda t: t.id() not in merged, activitytrack=False) # remove duplicate other track for final union
# Merge other activities into selfclone: one-to-one mapping
for (i,j) in merged.items(): # i=id of other, j=id of self
oc.activitymap(lambda a: a.replaceid(i, j) if a.hastrack(i) else a) # update track IDs referenced in activities for merged tracks
for (i,ai) in sc.activities().items():
for (j,aj) in oc.activities().items():
if ai.category() == aj.category() and set(ai.trackids()) == set(aj.trackids()) and ai.temporal_iou(aj) > temporal_iou_threshold:
oc.activityfilter(lambda a: a.id() != j) # remove duplicate activity from final union
oc.activityfilter(lambda a: len(a.tracks())>0) # remove empty activities not merged
# Union
sc.tracks().update(oc.tracks())
sc.activities().update(oc.activities())
# Final union of unique tracks/activities
if track:
self.tracks(sc.tracklist()) # union of tracks only
if activity:
self.activities(sc.activitylist()) # union of activities only: may reference tracks not in self of track=False
return self
def annotate(self, outfile=None, fontsize=10, captionoffset=(0,0), textfacecolor='white', textfacealpha=1.0, shortlabel=True, boxalpha=0.25, d_category2color={'Person':'green', 'Vehicle':'blue', 'Object':'red'}, categories=None, nocaption=False, nocaption_withstring=[], mutator=None, timestamp=None, timestampcolor='black', timestampfacecolor='white', verbose=False):
"""Generate a video visualization of all annotated objects and activities in the video.
The annotation video will be at the resolution and framerate of the underlying video, and pixels in this video will now contain the overlay.
This function does not play the video, it only generates an annotation video frames. Use show() which is equivalent to annotate().saveas().play()
Args:
outfile: [str] An optional file to stream the anntation to without storing the annotated video in memory
fontsize: [int] The fontsize of bounding box captions, used by matplotlib
captionoffset: (tuple) The (x,y) offset relative to the bounding box to place the caption for each box.
textfacecolor: [str] The color of the text in the bounding box caption. Must be in `vipy.gui.using_matplotlib.colorlist`.
textfacealpha: [float] The transparency of the text in the bounding box caption. Must be in [0,1], where 0=transparent and 1=opaque.
shortlabel: [bool] If true, display the shortlabel for each object in the scene, otherwise show the full category
boxalpha: [float] The transparency of the box face behind the text. Must be in [0,1], where 0=transparent and 1=opaque.
d_category2color: [dict] A dictionary mapping categories of objects in the scene to their box colors. Named colors must be in `vipy.gui.using_matplotlib.colorlist`.
categories: [list] Only show these categories, or show them all if None
nocaption_withstring: [list]: Do not show captions for those detection categories (or shortlabels) containing any of the strings in the provided list
nocaption: [bool] If true, do not show any captions, just boxes
mutator: [lambda] A lambda function that will mutate an image to allow for complex visualizations. This should be a mutator like `vipy.image.mutator_show_trackid`.
timestamp: [bool] If true, show a semitransparent timestamp (when the annotation occurs, not when the video was collected) with frame number in the upper left corner of the video
timestampcolor: [str] The color of the timstamp text. Named colors must be in `vipy.gui.using_matplotlib.colorlist`.
timestampfacecolor: [str] The color of the timestamp background. Named colors must be in `vipy.gui.using_matplotlib.colorlist`.
verbose: [bool] Show more helpful messages if true
Returns:
A `vipy.video.Video` with annotations in the pixels. If outfile is provided, then the returned video will be flushed.
.. note:: In general, this function should not be run on very long videos without the outfile kwarg, as it requires loading the video framewise into memory.
"""
assert outfile is None or vipy.util.isvideofile(outfile), "Invalid filename extension for annotated video"
if verbose:
print('[vipy.video.annotate]: Annotating video ...')
f_mutator = mutator if mutator is not None else vipy.image.mutator_show_jointlabel()
f_timestamp = (lambda k: '%s %d' % (vipy.util.clockstamp(), k)) if timestamp is True else timestamp
if outfile is None:
assert self.load().isloaded(), "Load() failed"
imgs = [f_mutator(self[k].clone(), k).savefig(fontsize=fontsize,
captionoffset=captionoffset,
textfacecolor=textfacecolor,
textfacealpha=textfacealpha,
shortlabel=shortlabel,
boxalpha=boxalpha,
d_category2color=d_category2color,
categories=categories,
nocaption=nocaption,
timestampcolor=timestampcolor,
timestampfacecolor=timestampfacecolor,
timestamp=f_timestamp(k) if timestamp is not None else None,
figure=1 if k<(len(self)-1) else None, # cleanup on last frame
nocaption_withstring=nocaption_withstring).numpy() for k in range(0, len(self))]
# Replace pixels with annotated pixels and downcast object to vipy.video.Video (since there are no more objects to show)
return vipy.video.Video(array=np.stack([np.array(PIL.Image.fromarray(img).convert('RGB')) for img in imgs], axis=0), framerate=self.framerate(), attributes=self.attributes) # slow for large videos
else:
# Stream to output video without loading all frames into memory
n = self.duration_in_frames_of_videofile() if not self.isloaded() else len(self)
vo = vipy.video.Video(filename=outfile, framerate=self.framerate())
with vo.stream(overwrite=True) as so:
for (k,im) in enumerate(self.stream()):
so.write(f_mutator(im.clone(), k).savefig(fontsize=fontsize,
captionoffset=captionoffset,
textfacecolor=textfacecolor,
textfacealpha=textfacealpha,
shortlabel=shortlabel,
boxalpha=boxalpha,
d_category2color=d_category2color,
categories=categories,
nocaption=nocaption,
timestampcolor=timestampcolor,
timestampfacecolor=timestampfacecolor,
timestamp=f_timestamp(k) if timestamp is not None else None,
figure=1 if k<(n-1) else None, # cleanup on last frame
nocaption_withstring=nocaption_withstring).rgb())
return vo
def _show(self, outfile=None, verbose=True, fontsize=10, captionoffset=(0,0), textfacecolor='white', textfacealpha=1.0, shortlabel=True, boxalpha=0.25, d_category2color={'Person':'green', 'Vehicle':'blue', 'Object':'red'}, categories=None, nocaption=False, nocaption_withstring=[], notebook=False, timestamp=None, timestampcolor='black', timestampfacecolor='white'):
"""Generate an annotation video saved to outfile (or tempfile if outfile=None) and show it using ffplay when it is done exporting. Do not modify the original video buffer. Returns a clone of the video with the shown annotation."""
return self.clone().annotate(verbose=verbose,
fontsize=fontsize,
captionoffset=captionoffset,
textfacecolor=textfacecolor,
textfacealpha=textfacealpha,
shortlabel=shortlabel,
boxalpha=boxalpha,
d_category2color=d_category2color,
categories=categories,
nocaption=nocaption,
timestampcolor=timestampcolor,
timestampfacecolor=timestampfacecolor,
timestamp=timestamp,
nocaption_withstring=nocaption_withstring).saveas(outfile).play(notebook=notebook)
def show(self, outfile=None, verbose=True, fontsize=10, captionoffset=(0,0), textfacecolor='white', textfacealpha=1.0, shortlabel=True, boxalpha=0.25, d_category2color={'Person':'green', 'Vehicle':'blue', 'Object':'red'}, categories=None, nocaption=False, nocaption_withstring=[], figure=1, fps=None, timestamp=None, timestampcolor='black', timestampfacecolor='white', mutator=None):
"""Faster show using interative image show for annotated videos. This can visualize videos before video rendering is complete, but it cannot guarantee frame rates. Large videos with complex scenes will slow this down and will render at lower frame rates."""
fps = min(fps, self.framerate()) if fps is not None else self.framerate()
assert fps > 0, "Invalid display framerate"
f_timestamp = (lambda k: '%s %d' % (vipy.util.clockstamp(), k)) if timestamp is True else timestamp
f_mutator = mutator if mutator is not None else vipy.image.mutator_show_jointlabel()
if not self.isdownloaded() and self.hasurl():
self.download()
with Stopwatch() as sw:
for (k,im) in enumerate(self.load() if self.isloaded() else self.stream()):
time.sleep(max(0, (1.0/self.framerate())*int(np.ceil((self.framerate()/fps)))))
f_mutator(im,k).show(categories=categories,
figure=figure,
nocaption=nocaption,
nocaption_withstring=nocaption_withstring,
fontsize=fontsize,
boxalpha=boxalpha,
d_category2color=d_category2color,
captionoffset=captionoffset,
textfacecolor=textfacecolor,
textfacealpha=textfacealpha,
timestampcolor=timestampcolor,
timestampfacecolor=timestampfacecolor,
timestamp=f_timestamp(k) if timestamp is not None else None,
shortlabel=shortlabel)
if vipy.globals._user_hit_escape():
break
vipy.show.close(figure)
return self
def thumbnail(self, outfile=None, frame=0, fontsize=10, nocaption=False, boxalpha=0.25, dpi=200, textfacecolor='white', textfacealpha=1.0):
"""Return annotated frame=k of video, save annotation visualization to provided outfile if provided, otherwise return vipy.image.Scene"""
im = self.frame(frame, img=self.preview(framenum=frame).array())
return im.savefig(outfile=outfile, fontsize=fontsize, nocaption=nocaption, boxalpha=boxalpha, dpi=dpi, textfacecolor=textfacecolor, textfacealpha=textfacealpha) if outfile is not None else im
def stabilize(self, padheightfrac=0.125, padwidthfrac=0.25, padheightpx=None, padwidthpx=None, gpu=None, outfile=None):
"""Background stablization using flow based stabilization masking foreground region.
- This will output a video with all frames aligned to the first frame, such that the background is static.
- This uses the flow based approach described in `vipy.flow.Flow.stabilize`
Args:
padheightfrac: [float] The height padding (relative to video height) to be applied to output video to allow for vertical stabilization
padwidthfrac: [float] The width padding (relative to video width) to be applied to output video to allow for horizontal stabilization
padheightpx: [int] The height padding to be applied to output video to allow for vertical stabilization. Overrides padheight.
padwidthpx: [int] The width padding to be applied to output video to allow for horizontal stabilization. Overrides padwidth.
gpu: [int] The GPU index to use, if opencv has been compiled with GPU support (this is rare)
outfile: [str] The output filename to store the stabilized video
Returns:
A clone of this video with background pixels stabilized to the first frame.
.. note::
- If the camera pans outside the image rectangle, increase the padheight or padwidth to make sure that the actor stays inside the stabilized image rectangle
- If there are moving actors in the scene, include bounding boxes for each and these boxes are ignored as keeyouts in the flow stabilization
"""
from vipy.flow import Flow # requires opencv
return Flow(flowdim=256, gpu=gpu).stabilize(self.clone(), residual=True, strict=True, padheightfrac=padheightfrac, padwidthfrac=padwidthfrac, padheightpx=padheightpx, padwidthpx=padwidthpx, outfile=outfile)
def pixelmask(self, pixelsize=8):
"""Replace all pixels in foreground boxes with pixelation (e.g. bigger pixels, like privacy glass)"""
for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy
im.pixelmask(pixelsize) # shared numpy array
return self
def pixelize(self, radius=16):
"""Alias for pixelmask()"""
return self.pixelmask(pixelsize=radius)
def pixelate(self, radius=16):
"""Alias for pixelmask()"""
return self.pixelmask(pixelsize=radius)
def binarymask(self):
"""Replace all pixels in foreground boxes with white, zero in background"""
for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy
im.binarymask() # shared numpy array
return self
def asfloatmask(self, fg=1.0, bg=0.0):
"""Replace all pixels in foreground boxes with fg, and bg in background, return a copy"""
assert self.isloaded()
self.numpy() # convert to writeable numpy array, triggers writeable copy
array = np.full( (len(self.load()), self.height(), self.width(), 1), dtype=np.float32, fill_value=bg)
for (k,im) in enumerate(self):
for bb in im.objects():
if bb.hasintersection(im.imagebox()):
array[k, int(round(bb._ymin)):int(round(bb._ymax)), int(round(bb._xmin)):int(round(bb._xmax))] = fg # does not need imclip
return vipy.video.Video(array=array, framerate=self.framerate(), colorspace='float')
def meanmask(self):
"""Replace all pixels in foreground boxes with mean color"""
for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy
im.meanmask() # shared numpy array
return self
def fgmask(self):
"""Replace all pixels in foreground boxes with zero"""
for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy
im.fgmask() # shared numpy array
return self
def zeromask(self):
"""Alias for fgmask"""
return self.fgmask()
def blurmask(self, radius=7):
"""Replace all pixels in foreground boxes with gaussian blurred foreground"""
for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy
im.blurmask(radius) # shared numpy array
return self
def downcast(self):
"""Cast the object to a `vipy.video.Video` class"""
self.__class__ = vipy.video.Video
return self
def merge_tracks(self, dilate_height=2.0, dilate_width=2.0, framedist=5):
"""Merge tracks if a track endpoint dilated by a fraction overlaps exactly one track startpoint, and the endpoint and startpoint are close enough together temporally.
.. note::
- This is useful for continuing tracking when the detection framerate was too low and the assignment falls outside the measurement gate.
- This will not work for complex scenes, as it assumes that there is exactly one possible continuation for a track.
"""
merged = set([])
for ti in sorted(self.tracklist(), key=lambda t: t.startframe()):
for tj in sorted(self.tracklist(), key=lambda t: t.startframe()):
if (tj.id() not in merged) and (ti.id() != tj.id()) and (tj.startframe() >= ti.endframe()) and ((tj.startframe()-ti.endframe()) <= framedist) and (ti.category() == tj.category()):
di = ti[ti.endframe()].dilate_height(dilate_height).dilate_width(dilate_width)
dj = tj[tj.startframe()]
if di.iou(dj) > 0 and not any([di.iou(tk[tj.startframe()]) > 0 for tk in self.tracklist() if (tk.id() not in [ti.id(), tj.id()]) and tk.during(tj.startframe())]):
self.tracks()[ti.id()] = ti.union(tj) # Merge tracks that are within gating distance
self.delete(tj.id()) # remove merged track
merged.add(tj.id())
break
return self
def assign(self, frame, dets, minconf=0.2, maxhistory=5, activityiou=0.5, trackcover=0.2, trackconfsamples=4, gate=0, activitymerge=True, activitynms=False):
"""Assign a list of `vipy.object.Detection` object detections and `vipy.activity.Activity` activity detections at frame k to scene tracks and activities by greedy assignment. In-place update.
Approach:
- This approach is equivalent to greedy, constant velocity SORT tracking (https://arxiv.org/abs/1602.00763)
- Individual detections are assigned to tracks using a greedy velocity only track propagation, sorted by `vipy.geometry.BoundingBox.maxcover` and detection confidence within a spatial tracking gate
- New tracks are created if the detection is unassigned and above a minimum confidence
- Updated tracks resulting from assignment are stored in `vipy.video.Scene.tracks`
Args:
frame: [int] The frame index to assign the detections into the scene
dets: [list] A list of `vipy.object.Detection` or `vipy.activity.Activity` objects as returned from a detector
miniou: [float] the minimum temporal IOU for activity assignment
minconf: [float] the minimum confidence for a detection to be considered as a new track
maxhistory: [int] the maximum propagation length of a track with no measurements, the frame history used for velocity estimates
trackconfsamples: [int] the number of uniformly spaced samples along a track to compute a mean track confidence
gate: [int] the gating distance in pixels used for assignment of fast moving detections. Useful for low detection framerates if a detection does not overlap with the track.
trackcover: [float] the minimum cover necessary for assignment of a detection to a track
activitymerge: [bool] if true, then merge overlapping activity detections of the same track and category, otherwise each activity detection is added as a new detection
activitynms: [bool] if true, then perform non-maximum suppression of activity detections of the same actor and category that overlap more than activityiou
Returns:
This video object with each det assigned to corresponding track or activity.
"""
assert dets is None or all([isinstance(d, vipy.object.Detection) or isinstance(d, vipy.activity.Activity) for d in tolist(dets)]), "invalid input"
assert frame >= 0 and minconf >= 0 and minconf <= 1.0 and maxhistory > 0, "invalid input"
if dets is None or len(tolist(dets)) == 0:
return self
dets = tolist(dets)
if any([d.confidence() is None for d in dets]):
warnings.warn('Removing %d detections with no confidence' % len([d.confidence() is None for d in dets]))
dets = [d for d in dets if d.confidence() is not None]
objdets = [d for d in dets if isinstance(d, vipy.object.Detection)]
activitydets = [d for d in dets if isinstance(d, vipy.activity.Activity)]
# Object detection to track assignment
if len(objdets) > 0:
# Track propagation: Constant velocity motion model for active tracks
t_ref = [(t, t.linear_extrapolation(frame, dt=maxhistory, shape=False)) for (k,t) in self.tracks().items() if ((frame - t.endframe()) <= maxhistory)]
trackarea = [ti.area() for (t,ti) in t_ref]
detarea = [d.area() for d in objdets]
# Track assignment:
# - Each track is assigned at most one detection
# - Each detection is assigned to at most one track.
# - Assignment is the highest confidence maximum overlapping detection by cover within tracking gate
trackconf = {t.id():t.confidence(samples=trackconfsamples) for (t, ti) in t_ref}
assignments = [(t, d.confidence(), d.iou(ti, area=detarea[j], otherarea=trackarea[i]), d.shapeiou(ti, area=detarea[j], otherarea=trackarea[i]), d.maxcover(ti, area=detarea[j], otherarea=trackarea[i]), d)
for (i, (t, ti)) in enumerate(t_ref)
for (j,d) in enumerate(objdets)
if (t.category() == d.category() and
(((ti._xmax if ti._xmax < d._xmax else d._xmax) - (ti._xmin if ti._xmin > d._xmin else d._xmin)) > 0 and
((ti._ymax if ti._ymax < d._ymax else d._ymax) - (ti._ymin if ti._ymin > d._ymin else d._ymin)) > 0))]
assigned = set([])
posconf = min([d.confidence() for d in objdets]) if len(objdets)>0 else 0
assignments.sort(key=lambda x: (x[1]+posconf)*(x[2]+x[3]+x[4])+trackconf[x[0].id()], reverse=True) # in-place
for (t, conf, iou, shapeiou, cover, d) in assignments:
if cover > (trackcover if len(t)>1 else 0): # the highest confidence detection within the assignment gate (or any overlap if not yet enough history for velocity estimate)
if (t.id() not in assigned and d.id() not in assigned): # not assigned yet, assign it!
self.track(t.id()).update(frame, d.clone()) # track assignment! (clone required)
assigned.add(t.id()) # cannot assign again to this track
assigned.add(d.id()) # mark detection as assigned
# Track spawn from unassigned and unexplained detections
for (j,d) in enumerate(objdets):
if (d.id() not in assigned):
if (d.confidence() >= minconf and not any([t.linear_extrapolation(frame, dt=maxhistory, shape=False).maxcover(d, otherarea=detarea[j]) >= 0.7 for (i,(t,ti)) in enumerate(t_ref) if t.category() == d.category()])):
gated = [(t, t.linear_extrapolation(frame, dt=maxhistory, shape=False)) for (t,ti) in t_ref if (t.id() not in assigned and t.category() == d.category())] if gate>0 else []
gated = sorted([(t, ti) for (t, ti) in gated if ti.hasintersection(d, gate=gate)], key=lambda x: d.sqdist(x[1]))
if len(gated) > 0:
self.track(gated[0][0].id()).update(frame, d.clone()) # track assignment! (clone required)
assigned.add(gated[0][0].id())
assigned.add(d.id())
else:
assigned.add(self.add(vipy.object.Track(keyframes=[frame], boxes=[d.clone()], category=d.category(), framerate=self.framerate()), rangecheck=False)) # clone required
assigned.add(d.id())
# Activity assignment
if len(activitydets) > 0:
assert all([d.actorid() in self.tracks() for d in activitydets]), "Invalid activity"
assigned = set([])
if activitymerge:
minframe = min([a._startframe for a in activitydets])
activities = [a for a in self.activities().values() if a._endframe >= minframe]
for d in activitydets:
for a in activities:
if (a._label == d._label) and (a._actorid == d._actorid) and a.hasoverlap(d, activityiou):
a.union(d) # activity assignment
assigned.add(d._id)
break # assigned, early exit
if activitynms:
minframe = min([a._startframe for a in activitydets])
activities = sorted([a for a in self.activities().values() if a._endframe >= minframe], key=lambda a: a.confidence(), reverse=True)
for d in sorted(activitydets, key=lambda x: x.confidence(), reverse=True):
for a in activities:
if (a._label == d._label) and (a._actorid == d._actorid) and a.hasoverlap(d, activityiou):
assigned.add(a._id if d.confidence()>a.confidence() else d._id) # suppressed
for id in assigned:
if id in self._activities:
del self._activities[id] # suppression, faster than self.activityfilter(lambda a: a.id() in assigned)
# Activity construction from unassigned detections
for d in activitydets:
if d._id not in assigned:
self.add(d.clone())
return self
def RandomVideo(rows=None, cols=None, frames=None):
"""Return a random loaded vipy.video.video.
Useful for unit testing, minimum size (32x32x32) for ffmpeg
"""
rows = np.random.randint(256, 1024) if rows is None else rows
cols = np.random.randint(256, 1024) if cols is None else cols
frames = np.random.randint(32, 256) if frames is None else frames
assert rows>32 and cols>32 and frames>=32
return Video(array=np.uint8(255 * np.random.rand(frames, rows, cols, 3)), colorspace='rgb')
def RandomScene(rows=None, cols=None, frames=None):
"""Return a random loaded vipy.video.Scene.
Useful for unit testing.
"""
v = RandomVideo(rows, cols, frames)
(rows, cols) = v.shape()
tracks = [vipy.object.Track(label='track%d' % k, shortlabel='t%d' % k,
keyframes=[0, np.random.randint(50,100), 150],
framerate=30,
boxes=[vipy.object.Detection(xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16),
width=np.random.randint(16,cols//2), height=np.random.randint(16,rows//2)),
vipy.object.Detection(xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16),
width=np.random.randint(16,cols//2), height=np.random.randint(16,rows//2)),
vipy.object.Detection(xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16),
width=np.random.randint(16,cols//2), height=np.random.randint(16,rows//2))]) for k in range(0,32)]
activities = [vipy.activity.Activity(label='activity%d' % k, shortlabel='a%d' % k, tracks=[tracks[j].id() for j in [np.random.randint(32)]], startframe=np.random.randint(50,99), endframe=np.random.randint(100,150), framerate=30) for k in range(0,32)]
return Scene(array=v.array(), colorspace='rgb', category='scene', tracks=tracks, activities=activities, framerate=30)
def RandomSceneActivity(rows=None, cols=None, frames=256):
"""Return a random loaded vipy.video.Scene.
Useful for unit testing.
"""
v = RandomVideo(rows, cols, frames)
(rows, cols) = v.shape()
tracks = [vipy.object.Track(label=['Person','Vehicle','Object'][k], shortlabel='track%d' % k, boundary='strict',
keyframes=[0, np.random.randint(50,100), np.random.randint(50,150)],
framerate=30,
boxes=[vipy.object.Detection(xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16),
width=np.random.randint(16,cols//2), height=np.random.randint(16,rows//2)),
vipy.object.Detection(xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16),
width=np.random.randint(16,cols//2), height=np.random.randint(16,rows//2)),
vipy.object.Detection(xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16),
width=np.random.randint(16,cols//2), height=np.random.randint(16,rows//2))]) for k in range(0,3)]
activities = [vipy.activity.Activity(label='Person Carrying', shortlabel='Carry', tracks=[tracks[0].id(), tracks[1].id()], startframe=np.random.randint(20,50), endframe=np.random.randint(70,100), framerate=30)]
ims = Scene(array=v.array(), colorspace='rgb', category='scene', tracks=tracks, activities=activities, framerate=30)
return ims
def EmptyScene():
"""Return an empty scene"""
return vipy.video.Scene(array=np.zeros((1,1,1,3), dtype=np.uint8))
Functions
def EmptyScene()
-
Return an empty scene
Expand source code Browse git
def EmptyScene(): """Return an empty scene""" return vipy.video.Scene(array=np.zeros((1,1,1,3), dtype=np.uint8))
def RandomScene(rows=None, cols=None, frames=None)
-
Return a random loaded vipy.video.Scene.
Useful for unit testing.
Expand source code Browse git
def RandomScene(rows=None, cols=None, frames=None): """Return a random loaded vipy.video.Scene. Useful for unit testing. """ v = RandomVideo(rows, cols, frames) (rows, cols) = v.shape() tracks = [vipy.object.Track(label='track%d' % k, shortlabel='t%d' % k, keyframes=[0, np.random.randint(50,100), 150], framerate=30, boxes=[vipy.object.Detection(xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16), width=np.random.randint(16,cols//2), height=np.random.randint(16,rows//2)), vipy.object.Detection(xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16), width=np.random.randint(16,cols//2), height=np.random.randint(16,rows//2)), vipy.object.Detection(xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16), width=np.random.randint(16,cols//2), height=np.random.randint(16,rows//2))]) for k in range(0,32)] activities = [vipy.activity.Activity(label='activity%d' % k, shortlabel='a%d' % k, tracks=[tracks[j].id() for j in [np.random.randint(32)]], startframe=np.random.randint(50,99), endframe=np.random.randint(100,150), framerate=30) for k in range(0,32)] return Scene(array=v.array(), colorspace='rgb', category='scene', tracks=tracks, activities=activities, framerate=30)
def RandomSceneActivity(rows=None, cols=None, frames=256)
-
Return a random loaded vipy.video.Scene.
Useful for unit testing.
Expand source code Browse git
def RandomSceneActivity(rows=None, cols=None, frames=256): """Return a random loaded vipy.video.Scene. Useful for unit testing. """ v = RandomVideo(rows, cols, frames) (rows, cols) = v.shape() tracks = [vipy.object.Track(label=['Person','Vehicle','Object'][k], shortlabel='track%d' % k, boundary='strict', keyframes=[0, np.random.randint(50,100), np.random.randint(50,150)], framerate=30, boxes=[vipy.object.Detection(xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16), width=np.random.randint(16,cols//2), height=np.random.randint(16,rows//2)), vipy.object.Detection(xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16), width=np.random.randint(16,cols//2), height=np.random.randint(16,rows//2)), vipy.object.Detection(xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16), width=np.random.randint(16,cols//2), height=np.random.randint(16,rows//2))]) for k in range(0,3)] activities = [vipy.activity.Activity(label='Person Carrying', shortlabel='Carry', tracks=[tracks[0].id(), tracks[1].id()], startframe=np.random.randint(20,50), endframe=np.random.randint(70,100), framerate=30)] ims = Scene(array=v.array(), colorspace='rgb', category='scene', tracks=tracks, activities=activities, framerate=30) return ims
def RandomVideo(rows=None, cols=None, frames=None)
-
Return a random loaded vipy.video.video.
Useful for unit testing, minimum size (32x32x32) for ffmpeg
Expand source code Browse git
def RandomVideo(rows=None, cols=None, frames=None): """Return a random loaded vipy.video.video. Useful for unit testing, minimum size (32x32x32) for ffmpeg """ rows = np.random.randint(256, 1024) if rows is None else rows cols = np.random.randint(256, 1024) if cols is None else cols frames = np.random.randint(32, 256) if frames is None else frames assert rows>32 and cols>32 and frames>=32 return Video(array=np.uint8(255 * np.random.rand(frames, rows, cols, 3)), colorspace='rgb')
Classes
class Scene (filename=None, url=None, framerate=30.0, array=None, colorspace=None, category=None, tracks=None, activities=None, attributes=None, startframe=None, endframe=None, startsec=None, endsec=None)
-
vipy.video.Scene class
The vipy.video.Scene class provides a fluent, lazy interface for representing, transforming and visualizing annotated videos. The following constructors are supported:
vid = vipy.video.Scene(filename='/path/to/video.ext')
Valid video extensions are those that are supported by ffmpeg ['.avi','.mp4','.mov','.wmv','.mpg', 'mkv', 'webm'].
vid = vipy.video.Scene(url='https://www.youtube.com/watch?v=MrIN959JuV8') vid = vipy.video.Scene(url='http://path/to/video.ext', filename='/path/to/video.ext')
Youtube URLs are downloaded to a temporary filename, retrievable as vid.download().filename(). If the environment variable 'VIPY_CACHE' is defined, then videos are saved to this directory rather than the system temporary directory. If a filename is provided to the constructor, then that filename will be used instead of a temp or cached filename. URLs can be defined as an absolute URL to a video file, or to a site supported by 'youtube-dl' [https://ytdl-org.github.io/youtube-dl/supportedsites.html]
vid = vipy.video.Scene(array=frames, colorspace='rgb')
The input 'frames' is an NxHxWx3 numpy array corresponding to an N-length list of HxWx3 uint8 numpy array which is a single frame of pre-loaded video Note that the video transformations (clip, resize, rescale, rotate) are only available prior to load(), and the array() is assumed immutable after load().
vid = vipy.video.Scene(array=greyframes, colorspace='lum')
The input 'greyframes' is an NxHxWx1 numpy array corresponding to an N-length list of HxWx3 uint8 numpy array which is a single frame of pre-loaded video This corresponds to the luminance of an RGB colorspace
vid = vipy.video.Scene(array=greyframes, colorspace='lum', tracks=tracks, activities=activities)
- tracks = [vipy.object.Track(), …]
- activities = [vipy.object.Activity(), …]
The inputs are lists of tracks and/or activities. An object is a spatial bounding box with a category label. A track is a spatiotemporal bounding box with a category label, such that the box contains the same instance of an object. An activity is one or more tracks with a start and end frame for an activity performed by the object instances. Track and activity timing must be relative to the start frame of the Scene() constructor.
Expand source code Browse git
class Scene(VideoCategory): """ vipy.video.Scene class The vipy.video.Scene class provides a fluent, lazy interface for representing, transforming and visualizing annotated videos. The following constructors are supported: ```python vid = vipy.video.Scene(filename='/path/to/video.ext') ``` Valid video extensions are those that are supported by ffmpeg ['.avi','.mp4','.mov','.wmv','.mpg', 'mkv', 'webm']. ```python vid = vipy.video.Scene(url='https://www.youtube.com/watch?v=MrIN959JuV8') vid = vipy.video.Scene(url='http://path/to/video.ext', filename='/path/to/video.ext') ``` Youtube URLs are downloaded to a temporary filename, retrievable as vid.download().filename(). If the environment variable 'VIPY_CACHE' is defined, then videos are saved to this directory rather than the system temporary directory. If a filename is provided to the constructor, then that filename will be used instead of a temp or cached filename. URLs can be defined as an absolute URL to a video file, or to a site supported by 'youtube-dl' [https://ytdl-org.github.io/youtube-dl/supportedsites.html] ```python vid = vipy.video.Scene(array=frames, colorspace='rgb') ``` The input 'frames' is an NxHxWx3 numpy array corresponding to an N-length list of HxWx3 uint8 numpy array which is a single frame of pre-loaded video Note that the video transformations (clip, resize, rescale, rotate) are only available prior to load(), and the array() is assumed immutable after load(). ```python vid = vipy.video.Scene(array=greyframes, colorspace='lum') ``` The input 'greyframes' is an NxHxWx1 numpy array corresponding to an N-length list of HxWx3 uint8 numpy array which is a single frame of pre-loaded video This corresponds to the luminance of an RGB colorspace ```python vid = vipy.video.Scene(array=greyframes, colorspace='lum', tracks=tracks, activities=activities) ``` - tracks = [vipy.object.Track(), ...] - activities = [vipy.object.Activity(), ...] The inputs are lists of tracks and/or activities. An object is a spatial bounding box with a category label. A track is a spatiotemporal bounding box with a category label, such that the box contains the same instance of an object. An activity is one or more tracks with a start and end frame for an activity performed by the object instances. Track and activity timing must be relative to the start frame of the Scene() constructor. """ def __init__(self, filename=None, url=None, framerate=30.0, array=None, colorspace=None, category=None, tracks=None, activities=None, attributes=None, startframe=None, endframe=None, startsec=None, endsec=None): self._tracks = {} self._activities = {} super().__init__(url=url, filename=filename, framerate=framerate, attributes=attributes, array=array, colorspace=colorspace, category=category, startframe=startframe, endframe=endframe, startsec=startsec, endsec=endsec) # Tracks must be defined relative to the clip specified by this constructor if tracks is not None: tracks = tracks if isinstance(tracks, list) or isinstance(tracks, tuple) else [tracks] # canonicalize assert all([isinstance(t, vipy.object.Track) for t in tracks]), "Invalid track input; tracks=[vipy.object.Track(), ...]" self._tracks = {t.id():t for t in tracks} # Activities must be defined relative to the clip specified by this constructor if activities is not None: activities = activities if isinstance(activities, list) or isinstance(activities, tuple) else [activities] # canonicalize assert all([isinstance(a, vipy.activity.Activity) for a in activities]), "Invalid activity input; activities=[vipy.activity.Activity(), ...]" self._activities = {a.id():a for a in activities} self._currentframe = None # deprecated @classmethod def cast(cls, v, flush=False): """Cast a conformal vipy object to this class. This is useful for downcast and upcast conversion of video objects.""" assert isinstance(v, vipy.video.Video), "Invalid input - must be derived from vipy.video.Video" if v.__class__ != vipy.video.Scene: v.__class__ = vipy.video.Scene v._tracks = {} if flush or not hasattr(v, '_tracks') else v._tracks v._activities = {} if flush or not hasattr(v, '_activities') else v._activities v._category = None if flush or not hasattr(v, '_category') else v._category return v @classmethod def asjson(cls, s): """Restore an object serialized with self.json(). Alas for `vipy.video.Scene.from_json`. Usage: ```python vs = vipy.video.Scene.asjson(v.json()) ``` """ return vipy.video.Scene.from_json(s) @classmethod def from_json(cls, s): """Restore an object serialized with self.json() Usage: ```python vs = vipy.video.Scene.from_json(v.json()) ``` """ d = json.loads(s) if not isinstance(s, dict) else s d = {k.lstrip('_'):v for (k,v) in d.items()} # prettyjson (remove "_" prefix to attributes) v = super().from_json(s) # Packed attribute storage: # - When loading a large number of vipy objects, the python garbage collector slows down signficantly due to reference cycle counting # - Mutable objects and custom containers are tracked by the garbage collector and the more of them that are loaded the longer GC takes # - To avoid this, load attributes as tuples of packed strings. This is an immutable type that is not reference counted. Check this with gc.is_tracked() # - Then, unpack load the attributes on demand when accessing tracks() or activities(). Then, the nested containers are reference counted (even though they really should not since there are no cycles by construction) # - This is useful when calling vipy.util.load(...) on archives that contain hundreds of thousands of objects # - Do not access the private attributes self._tracks and self._attributes as they will be packed until needed # - Should install ultrajson (pip install ujson) for super fast parsing v._tracks = tuple([x if isinstance(x, str) else str(json.dumps(x)) for x in d['tracks'].values()]) # track ID key is embedded in object, legacy unpack of doubly JSON encoded strings (vipy-1.11.16) v._activities = tuple([x if isinstance(x, str) else str(json.dumps(x)) for x in d['activities'].values()]) # track ID key is embedded in object, legacy unpack of doubly JSON encoded strings (vipy-1.11.16) return v def pack(self): """Packing a scene returns the scene with the annotations JSON serialized. - This is useful for fast garbage collection when there are many objects in memory - This is useful for distributed processing prior to serializing from a scheduler to a client - This is useful for lazy deserialization of complex attributes when loading many videos into memory - Unpacking is transparent to the end user and is performed on the fly when annotations are accessed. There is no unpack() method. - See the notes in from_json() for why this helps with nested containers and reference cycle tracking with the python garbage collector """ d = json.loads(self.json()) self._tracks = tuple([x if isinstance(x, str) else str(json.dumps(x)) for x in d['tracks'].values()]) # efficient garbage collection: store as a packed string to avoid reference cycle tracking, unpack on demand self._activities = tuple([x if isinstance(x, str) else str(json.dumps(x)) for x in d['activities'].values()]) # efficient garbage collection: store as a packed string to avoid reference cycle tracking, unpack on demand return self def __repr__(self): strlist = [] if self.isloaded(): strlist.append("height=%d, width=%d, frames=%d, color=%s" % (self.height(), self.width(), len(self._array), self.colorspace())) if self.filename() is not None: strlist.append('filename="%s"' % (self.filename())) if self.hasurl(): strlist.append('url="%s"' % self.url()) if self._framerate is not None: strlist.append('fps=%1.1f' % float(self._framerate)) if not self.isloaded() and self._startframe is not None and self._endframe is not None: strlist.append('clip=(%d,%d)' % (self._startframe, self._endframe)) if not self.isloaded() and self._startframe is not None and self._endframe is None: strlist.append('clip=(%d,)' % (self._startframe)) if self.category() is not None: strlist.append('category="%s"' % self.category()) if self.hastracks(): strlist.append('tracks=%d' % len(self._tracks)) if self.hasactivities(): strlist.append('activities=%d' % len(self._activities)) return str('<vipy.video.scene: %s>' % (', '.join(strlist))) def instanceid(self, newid=None): """Return an annotation instance identifier for this video. An instance ID is a unique identifier for a ground truth annotation within a video, either a track or an activity. More than one instance ID may share the same video ID if they are from the same source videofile. This is useful when calling `vipy.video.Scene.activityclip` or `vipy.video.Scene.activitysplit` to clip a video into segments such that each clip has a unique identifier, but all share the same underlying `vipy.video.Video.videoid`. This is useful when calling `vipy.video.Scene.trackclip` or `vipy.video.Scene.tracksplit` to clip a video into segments such that each clip has a unique identifier, but all share the same underlying `vipy.video.Video.videoid`. Returns: INSTANCEID: if 'instance_id' key is in self.attribute VIDEOID_INSTANCEID: if '_instance_id' key is in self.attribute, as set by activityclip() or trackclip(). This is set using INSTANCE_ID=ACTIVITYID_ACTIVITYINDEX or INSTANCEID=TRACKID_TRACKINDEX, where the index is the temporal order of the annotation in the source video prior to clip(). VIDEOID_ACTIVITYINDEX: if 'activityindex' key is in self.attribute, as set by activityclip(). (fallback for legacy datasets). VIDEOID: otherwise """ if newid is not None: self.setattribute('instance_id', newid) return self else: if 'instance_id' in self.attributes: return self.attributes['instance_id'] # set at video creation time (e.g. pycollector) elif '_instance_id' in self.attributes: return self.attributes['_instance_id'] # set at activityclip() time for provenance from clips back to videos elif 'activityindex' in self.attributes: return '%s_%s' % (self.videoid(), str(self.attributes['activityindex'])) # set at activityclip() time for provenance from clips back to videos (deprecated) else: return self.videoid() def frame(self, k=0, img=None, noimage=False, t=None): """Return `vipy.image.Scene` object at frame k -The attributes of each of the `vipy.image.Scene.objects` in the scene contains helpful metadata for the provenance of the detection, including: - 'trackid' of the track this detection - 'activityid' associated with this detection - 'jointlabel' of this detection, used for visualization - 'noun verb' of this detection, used for visualization Args: k: [int >= 0] The frame index requested. This is relative to the current frame rate of the video. img: [numpy, None] An optional image to be used for this frame. This is useful to construct frames efficiently for videos if the pixel buffer is already available from a stream rather than a preview. noimage [bool]: If True, then return only annotations at frame k with empty frame buffer (e.g. no image pixels in the returned image object) t: [float >= 0] The frame time requested. This is converted into a frame index using the current framerate of the video. Return: A `vipy.image.Scene` object for frame k containing all objects in this frame and pixels if img != None or preview=True .. note:: - Modifying this frame will not affect the source video - If multiple objects are associated with an activity and a primary actor is defined, then only the primary actor is displayed as "Noun Verbing", objects are shown as "Noun" with the activityid in the attribute - If noun is associated with more than one activity, then this is shown as "Noun Verbing1\nNoun Verbing2", with a newline separator """ k = int(self.framerate()*t) if t is not None else k assert isinstance(k, int) and k>=0, "Frame index must be non-negative integer" assert img is not None or (self.isloaded() and k<len(self)) or not self.isloaded(), "Invalid frame index %d - Indexing video by frame must be integer within (0, %d)" % (k, len(self)-1) img = img if (img is not None or noimage) else (self._array[k] if self.isloaded() else self.preview(k).array()) dets = [t[k].clone(deep=True).setattribute('trackindex', j) for (j, t) in enumerate(self.tracklist()) if len(t)>0 and (t.during(k) or t.boundary()=='extend')] # track interpolation (cloned) with boundary handling for d in dets: d.attributes['activityid'] = [] # reset jointlabel = [(d.shortlabel(),'')] # [(Noun, Verbing1), (Noun, Verbing2), ...], initialized with empty verbs as [(Noun, ""), ... ] activityconf = [None] # for display for (aid, a) in self.activities().items(): # insertion order: First activity is primary, next is secondary (not in confidence order) if a.hastrack(d.attributes['trackid']) and a.during(k): # Display assumptions: # - Jointlabel is always displayed as "Noun Verbing" during activity (e.g. Person Carrying, Vehicle Turning) using noun=track shortlabel, verb=activity shortlabel # - If noun is associated with more than one activity, then this is shown as "Noun Verbing1\nNoun Verbing2", with a newline separator # - If multiple objects are associated with an activity and a primary actor is defined, then only the primary actor is displayed as "Noun Verbing", objects are shown as "Noun" with the activityid in the attributes if (a.actorid() is None or (a.actorid() == d.attributes['trackid'])) and not any([a.shortlabel() == v for (n,v) in jointlabel]): jointlabel.append( (d.shortlabel(), a.shortlabel()) ) # only show each activity once (even if repeated) activityconf.append(a.confidence()) d.attributes['activityid'].append(a.id()) # for activity correspondence (if desired) # For display purposes # - See `vipy.image.mutator_show_trackindex_verbonly` # - Double prepended underscore attributes are private and cleaned using `vipy.image.Image.sanitize` d.attributes['__jointlabel'] = '\n'.join([('%s %s' % (n,v)).strip() for (n,v) in jointlabel[0 if len(jointlabel)==1 else 1:]]) # to be deprecated d.attributes['__noun verb'] = jointlabel[0 if len(jointlabel)==1 else 1:] d.attributes['__activityconf'] = activityconf[0 if len(jointlabel)==1 else 1:] d.attributes['__trackindex'] = d.attributes['trackindex'] # trackindex to be replaced with __trackindex d.attributes['__trackid'] = d.attributes['trackid'] # trackid to be replaced with __trackid d.attributes['__activityid'] = d.attributes['activityid'] # activityid to be replaced with __activityid dets.sort(key=lambda d: (d.confidence() if d.confidence() is not None else 0, d.shortlabel())) # layering in video is ordered by decreasing track confidence and alphabetical shortlabel return vipy.image.Scene(array=img, colorspace=self.colorspace(), objects=dets, category=self.category()) def during(self, frameindex): try: self.__getitem__(frameindex) # triggers load return True except: return False def labeled_frames(self): """Iterate over frames, yielding tuples (activity+object labelset in scene, vipy.image.Scene())""" self.load() for k in range(0, len(self)): #self._currentframe = k # used only for incremental add() yield (self.labels(k), self.__getitem__(k)) #self._currentframe = None def framecomposite(self, n=2, dt=10, mindim=256): """Generate a single composite image with minimum dimension mindim as the uniformly blended composite of n frames each separated by dt frames""" if not self.isloaded(): self.mindim(mindim).load() imframes = [self.frame(k).maxmatte() for k in range(0, dt*n, dt)] img = np.uint8(np.sum([1/float(n)*im.array() for im in imframes], axis=0)) return imframes[0].clone().array(img) def isdegenerate(self): """Degenerate scene has empty or malformed tracks""" return len(self.tracklist()) == 0 or any([t.isempty() or t.isdegenerate() for t in self.tracklist()]) def quicklook(self, n=9, dilate=1.5, mindim=256, fontsize=10, context=False, startframe=0, animate=False, dt=30, thumbnail=None): """Generate a montage of n uniformly spaced annotated frames centered on the union of the labeled boxes in the current frame to show the activity ocurring in this scene at a glance Montage increases rowwise for n uniformly spaced frames, starting from frame zero and ending on the last frame. This quicklook is most useful when len(self.activities()==1) for generating a quicklook from an activityclip(). Args: n: [int]: Number of images in the quicklook dilate: [float]: The dilation factor for the bounding box prior to crop for display mindim: [int]: The minimum dimension of each of the elemenets in the montage fontsize: [int]: The size of the font for the bounding box label context: [bool]: If true, replace the first and last frame in the montage with the full frame annotation, to help show the scale of the scene animate: [bool]: If true, return a video constructed by animating the quicklook into a video by showing dt consecutive frames dt: [int]: The number of frames for animation startframe: [int]: The initial frame index to start the n uniformly sampled frames for the quicklook thumbnail [`vipy.image.Image`]: If provided, prepend the first element in the montage with this thumbnail. This is useful for showing a high resolution image (e.g. a face, small object) to be contained in the video for review. """ if not self.isloaded(): self.load() # triggers load() into memory, user should self.flush() to free if animate: return Video(frames=[self.quicklook(n=n, dilate=dilate, mindim=mindim, fontsize=fontsize, context=context, startframe=k, animate=False, dt=dt, thumbnail=thumbnail) for k in range(0, min(dt, len(self)))], framerate=self.framerate()) f_mutator = vipy.image.mutator_show_jointlabel() framelist = [min(int(np.round(f))+startframe, len(self)-1) for f in np.linspace(0, len(self)-1, n)] isdegenerate = [self.frame(k).boundingbox() is None or self.frame(k).boundingbox().dilate(dilate).intersection(self.framebox(), strict=False).isdegenerate() for (j,k) in enumerate(framelist)] imframes = [self.frame(k).maxmatte() # letterbox or pillarbox if (isdegenerate[j] or (context is True and (j == 0 or j == (n-1)))) else self.frame(k).padcrop(self.frame(k).boundingbox().dilate(dilate).imclipshape(self.width(), self.height()).maxsquare().int()).mindim(mindim, interp='nearest') for (j,k) in enumerate(framelist)] imframes = [f_mutator(im) for im in imframes] # show jointlabel from frame interpolation imframes = [im.savefig(fontsize=fontsize, figure=1).rgb() for im in imframes] # temp storage in memory if thumbnail is not None: assert isinstance(thumbnail, vipy.image.Image) imframes = [thumbnail.maxmatte().mindim(mindim)] + imframes return vipy.visualize.montage(imframes, imgwidth=mindim, imgheight=mindim) def tracks(self, tracks=None, id=None): """Return mutable dictionary of tracks, Args: tracks [dict]: If provided, replace track dictionary with provided track dictionary, and return self id [str]: If provided, return just the track associated with the provided track id Returns: This object if tracks!=None, otherwise the requested track (if id!=None) or trackdict (tracks=None) """ if isinstance(self._tracks, tuple): self._tracks = {t.id():t for t in [vipy.object.Track.from_json(json.loads(s)) for s in self._tracks]} # on-demand unpack (efficient garbage collection for large list of objects) if tracks is None and id is None: return self._tracks # mutable dict elif id is not None: return self._tracks[id] elif isinstance(tracks, dict): assert all([isinstance(t, vipy.object.Track) and k == t.id() for (k,t) in tracks.items()]), "Invalid input - Must be dictionary of vipy.object.Track" self._tracks = tracks.copy() # shallow copy return self else: assert all([isinstance(t, vipy.object.Track) for t in tolist(tracks)]), "Invalid input - Must be vipy.object.Track or list of vipy.object.Track" self._tracks = {t.id():t for t in tolist(tracks)} # insertion order preserved (python >=3.6) return self def track(self, id): return self.tracks(id=id) def trackindex(self, id): """Return the index in the tracklist of the track with the provided track id""" assert id in self.tracks() return [t.id() for t in self.tracklist()].index(id) def trackidx(self, idx): """Return the track at the specified index in the tracklist""" return self.tracklist()[idx] def activity(self, id=None): return self.activities(id=id) if id is not None else self.primary_activity() def next_activity(self, id): """Return the next activity just after the given activityid""" assert id in self.activities() A = self.activitylist() k = [k for (k,a) in enumerate(A) if a.id() == id][0] return A[k+1] if k<len(A)-1 else None def prev_activity(self, id): """Return the previous activity just before the given activityid""" assert id in self.activities() A = self.activitylist() k = [k for (k,a) in enumerate(A) if a.id() == id][0] return A[k-1] if k>=1 else None def tracklist(self): return list(self.tracks().values()) # triggers shallow copy def objects(self, casesensitive=True): """The objects in a scene are the unique categories of tracks""" return sorted(list(set([t.category() if casesensitive else t.category().lower() for t in self.tracklist()]))) def actorid(self, id=None, fluent=False): """Return or set the actor ID for the video. - The actor ID is the track ID of the primary actor in the scene. This is useful for assigning a role for activities that are performed by the actor. - The actor ID is the first track is in the tracklist Args: id: [str] if not None, then use this track ID as the actor fluent: [bool] If true, always return self. This is useful for those cases where the actorid being set is None. Returns: [id=None, fluent=False] the actor ID [id is not None] The video with the actor ID set, only if the ID is found in the tracklist .. note:: Not to be confused with biometric subject id. For videos collected with Visym Collector platform (https://visym.com/collector), the biometric subject ID can be retrieved via `vipy.video.Video.metadata` (e.g. self.metadata()['subject_ids']). """ if id is None: return (next(iter(self.tracks().keys())) if len(self._tracks)>0 else None) if not fluent else self # Python >=3.6 elif id in self._tracks: # Reorder tracks so that id is first idlist = [id] + [ti for ti in self.tracks().keys() if ti != id] self._tracks = {k:self.track(k) for k in idlist} else: warnings.warn('trackid=%s not found in "%s"' % (str(id), str(self))) return self def setactorid(self, id): """Alias for `vipy.video.Scene.actorid`""" return self.actorid(id, fluent=True) def actor(self): """Return the primary actor (first `vipy.object.Track`) in the video""" return next(iter(self.tracks().values())) if len(self._tracks)>0 else None # Python >=3.6 def primary_activity(self): """Return the primary activity of the video. - The primary activity is the first activity in the activitylist. - This is useful for activityclip() videos that are centered on a single activity Returns: `vipy.activity.Activity` that is first in the `vipy.video.Scene.activitylist` """ return next(iter(self.activities().values())) if len(self._activities)>0 else None # Python >=3.6 def first_activity(self): """Return the first activity of the video with the earliest start frame""" return sorted(self.activitylist(), key=lambda a: a.startframe())[0] if len(self._activities)>0 else None def last_activity(self): """Return the last activity of the video with the latest end frame""" return sorted(self.activitylist(), key=lambda a: a.endframe())[-1] if len(self._activities)>0 else None def activities(self, activities=None, id=None): """Return mutable dictionary of activities. All temporal alignment is relative to the current clip().""" if isinstance(self._activities, tuple): self._activities = {a.id():a for a in [vipy.activity.Activity.from_json(json.loads(s)) for s in self._activities]} # on-demand if activities is None and id is None: return self._activities # mutable dict elif id is not None: return self._activities[id] elif isinstance(activities, dict): assert all([isinstance(a, vipy.activity.Activity) and k == a.id() for (k,a) in activities.items()]), "Invalid input - Must be dictionary of vipy.activity.Activity" self._activities = activities.copy() # shallow copy return self else: assert all([isinstance(a, vipy.activity.Activity) for a in tolist(activities)]), "Invalid input - Must be vipy.activity.Activity or list of vipy.activity.Activity" self._activities = {a.id():a for a in tolist(activities)} # insertion order preserved (python >=3.6) return self def activityindex(self, k): """Return the `vipy.activity.Activity` at the requested index order in the video""" alist = self.activitylist() assert k >= 0 and k < len(alist), "Invalid index" return alist[k] def activitylist(self): """Return a list of activities in the video, returned in insertion order. Returns: A list of `vipy.activity.Activity` insertion ordered into the original video .. note:: The order of the activitylist() will not match the order of activityclip(), which is sorted by activity startframe. To match, use sorted(activitylist, key=lambda a: a.startframe()) """ return list(self.activities().values()) # insertion ordered (python >=3.6), triggers shallow copy def activityfilter(self, f): """Apply boolean lambda function f to each activity and keep activity if function is true, remove activity if function is false Filter out all activities longer than 128 frames ```python vid = vid.activityfilter(lambda a: len(a)<128) ``` Filter out activities with category in set ```python vid = vid.activityfilter(lambda a: a.category() in set(['category1', 'category2'])) ``` Args: f: [lambda] a lambda function that takes an activity and returns a boolean Returns: This video with the activities f(a)==False removed. """ assert callable(f) self._activities = {k:a for (k,a) in self.activities().items() if f(a) == True} return self def trackfilter(self, f, activitytrack=True): """Apply lambda function f to each object and keep if filter is True. Args: activitytrack: [bool] If true, remove track assignment from activities also, may result in activities with no tracks f: [lambda] The lambda function to apply to each track t, and if f(t) returns True, then keep the track Returns: self, with tracks removed in-place .. note:: Applying track filter with activitytrack=True may result in activities with no associated tracks. You should follow up with self.activityfilter(lambda a: len(a.trackids()) > 0). """ assert callable(f) self._tracks = {k:t for (k,t) in self.tracks().items() if f(t) == True} if activitytrack: self.activitymap(lambda a: a.trackfilter(lambda ti: ti in self._tracks)) # remove track association in activities #if any([len(a.tracks()) == 0 for a in self.activitylist()]): # warnings.warn('trackfilter(..., activitytrack=True) removed tracks which returned at least one degenerate activity with no tracks') return self def trackmap(self, f, strict=True): """Apply lambda function f to each activity -strict=True: enforce that lambda function must return non-degenerate Track() objects """ assert callable(f) self._tracks = {k:f(t) for (k,t) in self.tracks().items()} assert all([isinstance(t, vipy.object.Track) and (strict is False or not t.isdegenerate()) for (tk,t) in self.tracks().items()]), "Lambda function must return non-degenerate vipy.object.Track()" return self def activitymap(self, f): """Apply lambda function f to each activity""" assert callable(f) self._activities = {k:f(a) for (k,a) in self.activities().items()} assert all([isinstance(a, vipy.activity.Activity) for a in self.activitylist()]), "Lambda function must return vipy.activity.Activity()" return self def rekey(self, tracks=None, activities=None): """Change the track and activity IDs to randomly assigned UUIDs. Useful for cloning unique scenes. ```python v = vipy.video.RandomScene() v.rekey() # randomly rekey all track and activity ID v.rekey(tracks={...}) # rekey tracks (oldkey -> newkey) according to dictionary, randomly rekey activities v.rekey(tracks={...}, activities={}) # rekey tracks according to dict, no change to activities ``` Args: tracks [dict]: If not None, use this dictionary to remap oldkey->newkey for tracks. If None, use random keys. If empty dict, no change (do not rekey tracks) activities [dict]: If not None, use this dictionary to remap oldkey->newkey for activities. If None, use random keys. If empty dict, no change (do not rekey activities) Returns: This object, with all track ID and activity ID rekeyed as specified. All actor IDs in activities will be updated. """ assert activities is None or isinstance(activities, dict) and all([k in self.activities() for k in activities.keys()]) assert tracks is None or isinstance(tracks, dict) and all([k in self.tracks() for k in tracks.keys()]) d_old_to_new = {k:hex(int(uuid.uuid4().hex[0:8], 16))[2:] for (k,a) in self.activities().items()} if activities is None else activities self._activities = dict([(d_old_to_new[k], a.id(d_old_to_new[k])) if k in d_old_to_new else (k,a) for (k,a) in self.activities().items()]) d_old_to_new = {k:hex(int(uuid.uuid4().hex[0:8], 16))[2:] for (k,t) in self.tracks().items()} if tracks is None else tracks self._tracks = dict([(d_old_to_new[k], t.id(d_old_to_new[k])) if k in d_old_to_new else (k,t) for (k,t) in self.tracks().items()]) for (k,v) in d_old_to_new.items(): self.activitymap(lambda a: a.replaceid(k,v) ) return self def annotation(self): """Return an iterator over annotations in each frame. ```python for y in self.annotation(): for (bb,a) in y: print((bb,a)) ``` Yields: for each frame yield the tuple: ( (`vipy.object.Detection`, (tuple of `vipy.activity.Activity` performed by the actor in this bounding box)), ... ) .. note:: The preferred method for accessing annotations is a frame iterator, which includes pixels. However, this method provides access to just the annotations without pixels. """ endframe = max([a.endframe() for a in self.activitylist()]+[t.endframe() for (tk,t) in self.tracks().items()]) if (len(self._tracks) > 0 or len(self._activities) > 0) else 0 for k in range(0,endframe): yield tuple( [tuple( [t[k] if t.during(k) else None, tuple( [a for a in self.activitylist() if a.during(k) and a.hastrackoverlap(t)] ) ]) for t in self.tracklist()]) def label(self): """Return an iterator over labels in each frame""" endframe = max([a.endframe() for a in self.activitylist()]+[t.endframe() for (tk,t) in self.tracks().items()]) if (len(self._tracks) > 0 or len(self._activities) > 0) else 0 for k in range(0,endframe): yield self.labels(k) def labels(self, k=None): """Return a set of all object and activity labels in this scene, or at frame int(k)""" return self.activitylabels(k).union(self.objectlabels(k)) def activitylabel(self, startframe=None, endframe=None): """Return an iterator over activity labels in each frame, starting from startframe and ending when there are no more activities""" endframe = endframe if endframe is not None else (max([a.endframe() for a in self.activitylist()]) if len(self.activities())>0 else 0) startframe = startframe if startframe is not None else (min([a.startframe() for a in self.activitylist()]) if len(self.activities())>0 else 0) assert startframe <= endframe for k in range(startframe, endframe): yield self.activitylabels(k) def activitylabels(self, startframe=None, endframe=None): """Return a set of all activity categories in this scene, or at startframe, or in range [startframe, endframe]""" if startframe is None: return set([a.category() for a in self.activities().values()]) elif startframe is not None and endframe is None: return set([a.category() for a in self.activities().values() if a.during(startframe)]) elif startframe is not None and endframe is not None: return [set([a.category() for a in self.activities().values() if a.during(k)]) for k in range(startframe, endframe)] else: raise ValueError('Invalid input - must specify both startframe and endframe, or only startframe') def objectlabels(self, k=None, lower=False): """Return a python set of all activity categories in this scene, or at frame k. Args: k: [int] The object labels present at frame k. If k=None, then all object labels in the video lower: [bool] If true, return the object labels in alll lower case for case invariant string comparisonsn """ return set([t.category() if not lower else t.category().lower() for t in self.tracks().values() if k is None or t.during(k)]) def categories(self): """Alias for labels()""" return self.labels() def activity_categories(self): """Alias for activitylabels()""" return self.activitylabels() def hasactivities(self): """Does this video have any activities?""" return len(self._activities) > 0 def hasactivity(self, activityid): """Does this video have this activity id?""" return activityid in self.activities() def hastracks(self): """Does this video have any tracks?""" return len(self._tracks) > 0 def hastrack(self, trackid): """Does the video have this trackid? .. note:: Track IDs are available as vipy.object.Track().id() """ return trackid in self.tracks() def add(self, obj, category=None, attributes=None, rangecheck=True, frame=None, fluent=False): """Add the object obj to the scene, and return an index to this object for future updates This function is used to incrementally build up a scene frame by frame. Obj can be one of the following types: - obj = vipy.object.Detection(), this must be called from within a frame iterator (e.g. for im in video) to get the current frame index - obj = vipy.object.Track() - obj = vipy.activity.Activity() - obj = [xmin, ymin, width, height], with associated category kwarg, this must be called from within a frame iterator to get the current frame index It is recomended that the objects are added as follows. For a v=vipy.video.Scene(): ```python for im in v: # Do some processing on frame im to detect objects (object_labels, xywh) = object_detection(im) # Add them to the scene, note that each object instance is independent in each frame, use tracks for object correspondence for (lbl,bb) in zip(object_labels, xywh): v.add(bb, lbl) # Do some correspondences to track objects t2 = v.add( vipy.object.Track(...) ) # Update a previous track to add a keyframe v.track(t2).add( ... ) ``` The frame iterator will keep track of the current frame in the video and add the objects in the appropriate place. Alternatively, ```python v.add(vipy.object.Track(..), frame=k) ``` Args: obj: A conformal python object to add to the scene (`vipy.object.Detection`, `vipy.object.Track`, `vipy.activity.Activity`, [xmin, ymin, width, height] category: Used if obj is an xywh tuple attributes: Used only if obj is an xywh tuple frame: [int] The frame to add the object rangecheck: [bool] If true, check if the object is within the image rectangle and throw an exception if not. This requires introspecting the video shape using `vipy.video.Video.shape`. fluent: [bool] If true, return self instead of the object index """ if isinstance(obj, vipy.object.Detection): assert frame is not None, "add() for vipy.object.Detection() must be added during frame iteration (e.g. for im in video: )" k = frame if obj.hasattribute('trackid') and obj.attributes['trackid'] in self.tracks(): # The attribute "trackid" is set for a detection when interpolating a track at a frame. This is useful for reconstructing a track from previously enumerated detections trackid = obj.attributes['trackid'] self.trackmap(lambda t: t.update(k, obj) if obj.attributes['trackid'] == t.id() else t) return None if not fluent else self else: t = vipy.object.Track(category=obj.category(), keyframes=[k], boxes=[obj], boundary='strict', attributes=obj.attributes, trackid=obj.attributes['trackid'] if obj.hasattribute('trackid') else None, framerate=self.framerate()) if rangecheck and not obj.hasoverlap(width=self.width(), height=self.height()): raise ValueError("Track '%s' does not intersect with frame shape (%d, %d)" % (str(t), self.height(), self.width())) self.tracks()[t.id()] = t # by-reference return t.id() if not fluent else self elif isinstance(obj, vipy.object.Track): if rangecheck and not obj.boundingbox().isinside(vipy.geometry.imagebox(self.shape())): obj = obj.imclip(self.width(), self.height()) # try to clip it, will throw exception if all are bad warnings.warn('[vipy.video.add]: Clipping trackid=%s track="%s" to image rectangle' % (str(obj.id()), str(obj))) if obj.framerate() != self.framerate(): obj.framerate(self.framerate()) # convert framerate of track to framerate of video self.tracks()[obj.id()] = obj # by-reference return obj.id() if not fluent else self elif isinstance(obj, vipy.activity.Activity): if rangecheck and obj.startframe() >= obj.endframe(): raise ValueError("Activity '%s' has invalid (startframe, endframe)=(%d, %d)" % (str(obj), obj.startframe(), obj.endframe())) self.activities()[obj.id()] = obj # by-reference, activity may have no tracks return obj.id() if not fluent else self elif (istuple(obj) or islist(obj)) and len(obj) == 4 and isnumber(obj[0]): assert frame is not None, "add() for obj=xywh must be added at a specific frame" t = vipy.object.Track(category=category, keyframes=[frame], boxes=[vipy.geometry.BoundingBox(xywh=obj)], boundary='strict', attributes=attributes, framerate=self.framerate()) if rangecheck and not t.boundingbox().isinside(vipy.geometry.imagebox(self.shape())): t = t.imclip(self.width(), self.height()) # try to clip it, will throw exception if all are bad warnings.warn('Clipping track "%s" to image rectangle' % (str(t))) self.tracks()[t.id()] = t # by-reference return t.id() if not fluent else self else: raise ValueError('Undefined object type "%s" to be added to scene - Supported types are obj in ["vipy.object.Detection", "vipy.object.Track", "vipy.activity.Activity", "[xmin, ymin, width, height]"]' % str(type(obj))) def delete(self, id): """Delete a given track or activity by id, if present""" return self.trackfilter(lambda t: t.id() != id).activityfilter(lambda a: a.id() != id) def addframe(self, im, frame): """Add im=vipy.image.Scene() into vipy.video.Scene() at given frame. The input image must have been generated using im=self[k] for this to be meaningful, so that trackid can be associated""" assert isinstance(im, vipy.image.Scene), "Invalid input - Must be vipy.image.Scene()" assert im.shape() == self.shape(), "Frame input (shape=%s) must be same shape as video (shape=%s)" % (str(im.shape()), str(self.shape())) # Copy framewise vipy.image.Scene() into vipy.video.Scene(). self.numpy()[frame] = im.array() # will trigger copy for bb in im.objects(): self.trackmap(lambda t: t.update(frame, bb) if bb.attributes['trackid'] == t.id() else t) return self def clear(self): """Remove all activities and tracks from this object""" self._activities = {} self._tracks = {} return self def cleartracks(self): self._tracks = {} return self def clearactivities(self): self._activities = {} return self def replace(self, other, frame=None): """Replace tracks and activities with other if activity/track is during frame""" assert isinstance(other, vipy.video.Scene) self.activities([a for a in other.activitylist() if frame is None or a.during(frame)]) self.tracks([t for t in other.tracklist() if frame is None or t.during(frame)]) return self def json(self, encode=True): """Return JSON encoded string of this object. This may fail if attributes contain non-json encodeable object""" try: json.loads(json.dumps(self.attributes)) # round trip for the attributes dictionary - this can be any arbitrary object and contents may not be json encodable except: raise ValueError('Video contains non-JSON encodable object in self.attributes dictionary - Try self.sanitize() or to clear with self.attributes = {} first') d = json.loads(super().json()) d['tracks'] = {k:t.json(encode=False) for (k,t) in self.tracks().items()} d['activities'] = {k:a.json(encode=False) for (k,a) in self.activities().items()} try: return json.dumps(d) if encode else d except: # Legacy support for non JSON serializable objects (<= vipy.1.9.2) v = self.clone() for (ti, t) in v.tracks().items(): for o in t._keyboxes: vipy.geometry.BoundingBox.cast(o, flush=True) o.float().significant_digits(2) for (ai, a) in v.activities().items(): a._startframe = int(a._startframe) a._endframe = int(a._endframe) return v.json(encode=encode) def csv(self, outfile=None): """Export scene to CSV file format with header. If there are no tracks, this will be empty. """ assert self.load().isloaded() csv = [(self.filename(), # video filename k, # frame number (zero indexed) d.category(), d.shortlabel(), # track category and shortlabel (displayed in caption) ';'.join([self.activities(id=aid).category() for aid in tolist(d.attributes['activityid'])] if 'activityid' in d.attributes else ''), # semicolon separated activity category associated with track d.xmin(), d.ymin(), d.width(), d.height(), # bounding box d.attributes['trackid'], # globally unique track ID ';'.join([aid for aid in tolist(d.attributes['activityid'])] if 'activityid' in d.attributes else '')) # semicolon separated activity ID associated with track for (k,im) in enumerate(self) for d in im.objects()] csv = [('# video_filename', 'frame_number', 'object_category', 'object_shortlabel', 'activity categories(;)', 'xmin', 'ymin', 'width', 'height', 'track_id', 'activity_ids(;)')] + csv return writecsv(csv, outfile) if outfile is not None else csv def framerate(self, fps=None): """Change the input framerate for the video and update frame indexes for all annotations. ```python fps = self.framerate() self.framerate(fps=15.0) ``` """ if fps is None: return self._framerate elif float(fps) == self._framerate: return self else: assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" fps = float(fps) self._startframe = int(round(self._startframe * (fps/self._framerate))) if self._startframe is not None else self._startframe # __repr__ only self._endframe = int(round(self._endframe * (fps/self._framerate))) if self._endframe is not None else self._endframe # __repr__only self._tracks = {k:t.framerate(fps) for (k,t) in self.tracks().items()} self._activities = {k:a.framerate(fps) for (k,a) in self.activities().items()} if 'fps=' in self._ffmpeg_commandline(): self._update_ffmpeg('fps', fps) # replace fps filter, do not add to it else: self._ffmpeg = self._ffmpeg.filter('fps', fps=fps, round='up') # create fps filter first time self._framerate = fps return self def activitysplit(self, idx=None): """Split the scene into k separate scenes, one for each activity. Do not include overlapping activities. Args: idx: [int],[tuple],[list]. Return only those activities in the provided activity index list, where the activity index is the integer index of the activity in the video. .. note:: This is useful for union() """ vid = self.clone(flushforward=True) if any([(a.endframe()-a.startframe()) <= 0 for a in vid.activities().values()]): warnings.warn('Filtering invalid activity with degenerate lengths: %s' % str([a for a in vid.activities().values() if (a.endframe()-a.startframe()) <= 0])) activities = sorted([a.clone() for a in vid.activities().values() if (a.endframe()-a.startframe()) > 0], key=lambda a: a.startframe()) # only activities with at least one frame, sorted in temporal order tracks = [ [t.clone() for (tid, t) in vid.tracks().items() if a.hastrack(t)] for a in activities] # tracks associated with each activity (may be empty) vid._activities = {} # for faster clone vid._tracks = {} # for faster clone return [vid.clone() .setattribute('_instance_id', ('%s_%d' % (vid.videoid(), k)) if not vid.hasattribute('_instance_id') else vid.getattribute('_instance_id')) .activities(pa) .tracks(t) .setactorid(pa.actorid()) for (k,(pa,t)) in enumerate(zip(activities, tracks)) if idx is None or k in tolist(idx)] def tracksplit(self): """Split the scene into k separate scenes, one for each track. Each scene starts at frame 0 and is a shallow copy of self containing exactly one track. - This is useful for visualization by breaking a scene into a list of scenes that contain only one track. - The attribute '_trackindex' is set in the attributes dictionary to provide provenance for the track relative to the source video .. notes:: Use clone() to create a deep copy if needed. """ return [self.clone(shallow=True).setattribute('_trackindex', k).tracks(t).activityfilter(lambda a: a.hastrack(tk)) for (k,(tk,t)) in enumerate(self.tracks().items())] def trackclip(self): """Split the scene into k separate scenes, one for each track. Each scene starts and ends when the track starts and ends""" return [t.setattribute('_instance_id', '%s_%d_trackclip' % (t.videoid(), k)).clip(t.track(t.actorid()).startframe(), t.track(t.actorid()).endframe()) for (k,t) in enumerate(self.tracksplit())] def activityclip(self, padframes=0, multilabel=True, idx=None, padto=None, padtosec=None): """Return a list of `vipy.video.Scene` objects each clipped to be temporally centered on a single activity, with an optional padframes before and after. Args: padframes: [int] for symmetric padding same before and after padframes: [tuple] (int, int) for asymmetric padding before and after padframes: [list[tuples]] [(int, int), ...] for activity specific asymmetric padding. See also padto. multilabel: [bool] include overlapping multilabel secondary activities in each activityclip idx: [int], [tuple], [list]. The indexes of the activities to return, where the index is the integer index order of the activity in the video. Useful for complex videos. padto: [int] padding so that each activity clip is at least padto frames long, with symmetric padding around the activity. padtosec: [float] padding so that each activity clip is at least padtosec seconds long, with symmetric padding around the activity. Returns: A list of `vipy.video.Scene` each cloned from the source video and clipped on one activity in the scene .. notes:: - The Scene() category is updated to be the activity category of the clip, and only the objects participating in the activity are included. - Clips are returned ordered in the temporal order they appear in the video. - The returned vipy.video.Scene() objects for each activityclip are clones of the video, with the video buffer flushed. - Each activityclip() is associated with each activity in the scene, and includes all other secondary activities that the objects in the primary activity also perform (if multilabel=True). See activityclip().labels(). - Calling activityclip() on activityclip(multilabel=True) will duplicate activities, due to the overlapping secondary activities being included in each clip with an overlap. Be careful! """ assert isinstance(padframes, int) or istuple(padframes) or islist(padframes) vid = self.clone(flushforward=True) if any([(a.endframe()-a.startframe()) <= 0 for a in vid.activities().values()]): warnings.warn('Filtering invalid activity clips with degenerate lengths: %s' % str([a for a in vid.activities().values() if (a.endframe()-a.startframe()) <= 0])) primary_activities = sorted([a.clone() for a in vid.activities().values() if (a.endframe()-a.startframe()) > 0], key=lambda a: a.startframe()) # only activities with at least one frame, sorted in temporal order padframelist = [padframes if istuple(padframes) else (padframes, padframes) for k in range(len(primary_activities))] if not islist(padframes) else padframes tracks = [ [t.clone() for (tid, t) in vid.tracks().items() if a.hastrackoverlap(t)] for a in primary_activities] # tracks associated with and temporally overlapping each primary activity (may be empty) secondary_activities = [[sa.clone() for sa in primary_activities if (sa.id() != pa.id() and pa.clone().temporalpad((prepad, postpad)).hasoverlap(sa) and (len(T)==0 or any([sa.hastrack(t) for t in T])))] for (pa, T, (prepad,postpad)) in zip(primary_activities, tracks, padframelist)] # overlapping secondary activities that includes any track in the primary activity secondary_activities = [sa if multilabel else [] for sa in secondary_activities] vid._activities = {} # for faster clone vid._tracks = {} # for faster clone maxframes = self.duration_in_frames() if (padframes != 0 or padto is not None or padtosec is not None) else None if padto is not None or padtosec is not None: cliplist = [(a.startframe(), a.endframe()) for a in primary_activities] padto = padto if padto is not None else int(round(padtosec*self.framerate())) padframelist = [(sp+int(np.ceil(((padto-(ef-sf))/2))), ep+int(np.ceil(((padto-(ef-sf))/2)))) if (ef-sf)<padto else (sp,ep) for ((sp,ep),(sf,ef)) in zip(padframelist, cliplist)] padframelist = [(0,ep+(-sp)) if (sp<0) else ((sp+(ep-(maxframes-ef)), maxframes-ef) if ((ef+ep)>maxframes) else (sp,ep)) for ((sp,ep),(sf,ef)) in zip(padframelist, cliplist)] # truncate to video boundary return [vid.clone() .activities([pa]+sa) # primary activity first .tracks(t) .clip(startframe=max(pa.startframe()-prepad, 0), endframe=min(pa.endframe()+postpad, (maxframes if maxframes is not None else pa.endframe()+postpad))) .category(pa.category()) .setactorid(pa.actorid()) # actor is actor of primary activity .setattribute('_instance_id', ('%s_%d' % (vid.videoid(), k)) if not vid.hasattribute('_instance_id') else vid.getattribute('_instance_id')) for (k,(pa,sa,t,(prepad,postpad))) in enumerate(zip(primary_activities, secondary_activities, tracks, padframelist)) if (idx is None or k in tolist(idx))] def noactivitylist(self, label=None, shortlabel=None): """Return a list of `vipy.activity.Activity` which are segments of each track with no associated activities. Args: label: [str] The activity label to give the background activities. Defaults to the track category (lowercase) Returns: A list of `vipy.activity.Activity` such that each activity is associated with a track with temporal support where no activities are performed. The union of activitylist() and noactivitylist() should cover the temporal support of all track """ A = [] for t in self.tracklist(): (startframe, endframe) = (t.startframe(), t.startframe()) for k in range(t.startframe(), t.endframe()): if not any([a.hastrack(t) and a.during(k) for a in self.activitylist()]) and k < (t.endframe()-1): endframe = k # background else: if startframe < endframe: A.append(vipy.activity.Activity(label=t.category() if label is None else label, shortlabel='' if label is None else (label if shortlabel is None else shortlabel), startframe=startframe, endframe=endframe, actorid=t.id(), framerate=self.framerate(), attributes={'_noactivitylist':True})) (startframe, endframe) = (k+1,k+1) return A def noactivityclip(self, label=None, shortlabel=None, padframes=0): """Return a list of vipy.video.Scene() each clipped on a track segment that has no associated activities. Args: label: [str] The activity label to give the background activities. Defaults to the track category (lowercase) shortlabel: [str] The activity shortlabel to give the background activities when visualized. Defaults to the track category (lowercase) padframes: [int] The amount of temporal padding to apply to the clips before and after in frames. See `vipy.video.Scene.activityclip` for options. Returns: A list of `vipy.video.Scene` each cloned from the source video and clipped in the temporal region between activities. The union of activityclip() and noactivityclip() should equal the entire video. .. notes:: - Each clip will contain exactly one activity "Background" which is the interval for this track where no activities are occurring - Each clip will be at least one frame long """ A = self.clone().activities(self.noactivitylist(label=label, shortlabel=shortlabel)).activityclip(padframes=padframes, multilabel=False) return [a.setattribute('_instance_id', '%s_bg' % a.getattribute('_instance_id')) for a in A] def trackbox(self, dilate=1.0): """The trackbox is the union of all track bounding boxes in the video, or None if there are no tracks Args: dilate: [float] A dilation factor to apply to the trackbox before returning. See `vipy.geometry.BoundingBox.dilate` Returns: A `vipy.geometry.BoundingBox` which is the union of all boxes in the track (or None if no boxes exist) """ boxes = [t.clone().boundingbox() for t in self.tracklist()] boxes = [bb.dilate(dilate) for bb in boxes if bb is not None] return boxes[0].union(boxes[1:]) if len(boxes) > 0 else None def framebox(self): """Return the bounding box for the image rectangle. Returns: A `vipy.geometry.BoundingBox` which defines the image rectangle .. notes: This requires calling `vipy.video.Video.preview` to get the frame shape from the current filter chain, which touches the video file""" return vipy.geometry.BoundingBox(xmin=0, ymin=0, width=self.width(), height=self.height()) def trackcrop(self, dilate=1.0, maxsquare=False, zeropad=True): """Return the trackcrop() of the scene which is the crop of the video using the `vipy.video.Scene.trackbox`. Args: zeropad: [bool] If True, the zero pad the crop if it is outside the image rectangle, otherwise return only valid pixels inside the image rectangle maxsquare: [bool] If True, make the bounding box the maximum square before cropping dilate: [float] The dilation factor to apply to the trackbox prior to cropping Returns: A `vipy.video.Scene` object from cropping the video using the trackbox. If there are no tracks, return None. """ bb = self.trackbox(dilate) # may be None if trackbox is degenerate return self.crop(bb.maxsquareif(maxsquare), zeropad=zeropad) if bb is not None else None def activitybox(self, activityid=None, dilate=1.0): """The activitybox is the union of all activity bounding boxes in the video, which is the union of all tracks contributing to all activities. This is most useful after activityclip(). The activitybox is the smallest bounding box that contains all of the boxes from all of the tracks in all activities in this video. """ activities = [a for (k,a) in self.activities().items() if (activityid is None or k in set(activityid))] boxes = [t.clone().boundingbox().dilate(dilate) for t in self.tracklist() if any([a.hastrack(t) for a in activities])] return boxes[0].union(boxes[1:]) if len(boxes) > 0 else vipy.geometry.BoundingBox(xmin=0, ymin=0, width=int(self.width()), height=int(self.height())) def activitycuboid(self, activityid=None, dilate=1.0, maxdim=256, bb=None): """The activitycuboid() is the fixed square spatial crop corresponding to the activitybox (or supplied bounding box), which contains all of the valid activities in the scene. This is most useful after activityclip(). The activitycuboid() is a spatial crop of the video corresponding to the supplied boundingbox or the square activitybox(). This crop must be resized such that the maximum dimension is provided since the crop can be tiny and will not be encodable by ffmpeg """ bb = self.activitybox(activityid).maxsquare() if bb is None else bb assert bb is None or isinstance(bb, vipy.geometry.BoundingBox) assert bb.issquare(), "Add support for non-square boxes" return self.clone().crop(bb.dilate(dilate).int(), zeropad=True).resize(maxdim, maxdim) # crop triggers preview() def activitysquare(self, activityid=None, dilate=1.0, maxdim=256): """The activity square is the maxsquare activitybox that contains only valid (non-padded) pixels interior to the image""" bb = self.activitybox(activityid).maxsquare().dilate(dilate).int().iminterior(self.width(), self.height()).minsquare() return self.activitycuboid(activityid, dilate=1.0, maxdim=maxdim, bb=bb) def activitytube(self, activityid=None, dilate=1.0, maxdim=256): """The activitytube() is a sequence of crops where the spatial box changes on every frame to track the activity. The box in each frame is the square activitybox() for this video which is the union of boxes contributing to this activity in each frame. This function does not perform any temporal clipping. Use activityclip() first to split into individual activities. Crops will be optionally dilated, with zeropadding if the box is outside the image rectangle. All crops will be resized so that the maximum dimension is maxdim (and square by default) """ vid = self.clone().load() # triggers load self.activityfilter(lambda a: activityid is None or a.id() in set(activityid)) # only requested IDs (or all of them) frames = [im.padcrop(im.boundingbox().maxsquare().dilate(dilate).int()).resize(maxdim, maxdim) for im in vid if im.boundingbox() is not None] # track interpolation, for frames with boxes only if len(frames) != len(vid): warnings.warn('[vipy.video.activitytube]: Removed %d frames with no spatial bounding boxes' % (len(vid) - len(frames))) vid.attributes['__activtytube'] = {'truncated':len(vid) - len(frames)} # provenance to reject if len(frames) == 0: warnings.warn('[vipy.video.activitytube]: Resulting video is empty! Setting activitytube to zero') frames = [ vid[0].resize(maxdim, maxdim).zeros() ] # empty frame vid.attributes['__activitytube'] = {'empty':True} # provenance to reject vid._tracks = {ti:vipy.object.Track(keyframes=[f for (f,im) in enumerate(frames) for d in im.objects() if d.attributes['trackid'] == ti], boxes=[d for (f,im) in enumerate(frames) for d in im.objects() if d.attributes['trackid'] == ti], category=t.category(), trackid=ti, framerate=self.framerate()) for (k,(ti,t)) in enumerate(self.tracks().items())} # replace tracks with boxes relative to tube return vid.array(np.stack([im.numpy() for im in frames])) def actortube(self, trackid=None, dilate=1.0, maxdim=256, strict=True): """The actortube() is a sequence of crops where the spatial box changes on every frame to track the primary actor performing an activity. The box in each frame is the square box centered on the primary actor performing the activity, dilated by a given factor (the original box around the actor is unchanged, this just increases the context, with zero padding) This function does not perform any temporal clipping. Use activityclip() first to split into individual activities. All crops will be resized so that the maximum dimension is maxdim (and square by default) """ assert trackid is not None or len(self.tracks()) == 1, "Track ID must be provided if there exists more than one track in the scene" trackid = trackid if trackid is not None else list(self.tracks().keys())[0] assert self.hastrack(trackid), "Track ID %s not found - Actortube requires a track ID in the scene (tracks=%s)" % (str(trackid), str(self.tracks())) vid = self.clone().load() # triggers load t = vid.tracks(id=trackid) # actor track frames = [im.padcrop(t[k].maxsquare().dilate(dilate).int()).resize(maxdim, maxdim) for (k,im) in enumerate(vid) if t.during(k)] if len(t)>0 else [] # actor interpolation, padding may introduce frames with no tracks if len(frames) == 0: if not strict: warnings.warn('[vipy.video.actortube]: Empty track for trackid="%s" - Setting actortube to zero' % trackid) frames = [ vid[0].resize(maxdim, maxdim).zeros() ] # empty frame vid.attributes['__actortube'] = {'empty':True} # provenance to reject else: raise ValueError('[vipy.video.actortube]: Empty track for track=%s, trackid=%s' % (str(t), trackid)) vid._tracks = {ti:vipy.object.Track(keyframes=[f for (f,im) in enumerate(frames) for d in im.objects() if d.attributes['trackid'] == ti], # keyframes zero indexed, relative to [frames] boxes=[d for (f,im) in enumerate(frames) for d in im.objects() if d.attributes['trackid'] == ti], # one box per frame category=t.category(), trackid=ti, framerate=self.framerate()) # preserve trackid for (k,(ti,t)) in enumerate(self.tracks().items())} # replace tracks with interpolated boxes relative to tube defined by actor return vid.array(np.stack([im.numpy() for im in frames])) def speed(self, s): """Change the speed by a multiplier s. If s=1, this will be the same speed, s=0.5 for half-speed (slower playback), s=2 for double-speed (faster playback)""" super().speed(s) return self.trackmap(lambda t: t.framerate(speed=s)).activitymap(lambda a: a.framerate(speed=s)) def clip(self, startframe, endframe=None): """Clip the video to between (startframe, endframe). This clip is relative to clip() shown by __repr__(). Args: startframe: [int] the start frame relative to the video framerate() for the clip endframe: [int] the end frame relative to the video framerate for the clip, may be none Returns: This video object, clipped so that a load() will result in frame=0 equivalent to startframe. All tracks and activities updated relative to the new startframe. .. note: - This return a clone of the video for idempotence - This does not load the video. This updates the ffmpeg filter chain to temporally trim the video. See self.commandline() for the updated filter chain to run. """ assert (endframe is None or startframe <= endframe) and startframe >= 0, "Invalid start and end frames (%s, %s)" % (str(startframe), str(endframe)) v = self.clone() if not v.isloaded(): # -- Copied from super().clip() to allow for clip on clone (for indempotence) # -- This code copy is used to avoid super(Scene, self.clone()) which screws up class inheritance for iPython reload assert not v.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" timestamp_in_seconds = ((v._startframe if v._startframe is not None else 0)+startframe)/float(v.framerate()) v._update_ffmpeg_seek(timestamp_in_seconds) if endframe is not None: v._ffmpeg = v._ffmpeg.setpts('PTS-STARTPTS') # reset timestamp to 0 before trim filter v._ffmpeg = v._ffmpeg.trim(start=0, end=(endframe-startframe)/self.framerate()) # must be in seconds to allow for framerate conversion v._ffmpeg = v._ffmpeg.setpts('PTS-STARTPTS') # reset timestamp to 0 after trim filter v._startframe = startframe if v._startframe is None else v._startframe + startframe # for __repr__ only v._endframe = (v._startframe + (endframe-startframe)) if endframe is not None else v._endframe # for __repr__ only # -- end copy else: endframe = endframe if endframe is not None else len(self._array) v._array = self._array[startframe:endframe] (v._startframe, v._endframe) = (0, endframe-startframe) v._tracks = {k:t.offset(dt=-startframe).truncate(startframe=0, endframe=(endframe-startframe) if endframe is not None else None) for (k,t) in v.tracks().items()} # may be degenerate v._activities = {k:a.offset(dt=-startframe).truncate(startframe=0, endframe=(endframe-startframe) if endframe is not None else None) for (k,a) in v.activities().items()} # may be degenerate return v.trackfilter(lambda t: len(t)>0).activityfilter(lambda a: len(a)>0) # remove degenerate tracks and activities def crop(self, bb, zeropad=True): """Crop the video using the supplied box, update tracks relative to crop, video is zeropadded if box is outside frame rectangle""" assert isinstance(bb, vipy.geometry.BoundingBox), "Invalid input" bb = bb.int() bbc = bb.clone().imclipshape(self.width(), self.height()).int() #if zeropad and bb != bbc: # self.zeropad(bb.width()-bbc.width(), bb.height()-bbc.height()) # bb = bb.offset(bb.width()-bbc.width(), bb.height()-bbc.height()) super().crop(bb, zeropad=zeropad) # range check handled here to correctly apply zeropad bb = bb if zeropad else bbc self._tracks = {k:t.offset(dx=-bb.xmin(), dy=-bb.ymin()) for (k,t) in self.tracks().items()} return self def zeropad(self, padwidth, padheight): """Zero pad the video with padwidth columns before and after, and padheight rows before and after Update tracks accordingly. """ assert isinstance(padwidth, int) and isinstance(padheight, int) super().zeropad(padwidth, padheight) self._tracks = {k:t.offset(dx=padwidth, dy=padheight) for (k,t) in self.tracks().items()} return self def fliplr(self): (H,W) = self.shape() # yuck, need to get image dimensions before filter self._tracks = {k:t.fliplr(H,W) for (k,t) in self.tracks().items()} super().fliplr() return self def flipud(self): assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" (H,W) = self.shape() # yuck, need to get image dimensions before filter self._tracks = {k:t.flipud(H,W) for (k,t) in self.tracks().items()} super().flipud() return self def rot90ccw(self): assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" (H,W) = self.shape() # yuck, need to get image dimensions before filter self._tracks = {k:t.rot90ccw(H,W) for (k,t) in self.tracks().items()} super().rot90ccw() return self def rot90cw(self): assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" (H,W) = self.shape() # yuck, need to get image dimensions before filter self._tracks = {k:t.rot90cw(H,W) for (k,t) in self.tracks().items()} super().rot90cw() return self def resize(self, rows=None, cols=None, width=None, height=None): """Resize the video to (rows, cols), preserving the aspect ratio if only rows or cols is provided""" assert not (rows is not None and height is not None) # cannot be both assert not (cols is not None and width is not None) # cannot be both rows = rows if rows is not None else height cols = cols if cols is not None else width assert rows is not None or cols is not None, "Invalid input" (H,W) = self.shape() # yuck, need to get image dimensions before filter, manually set this prior to calling resize if known sy = rows / float(H) if rows is not None else cols / float(W) sx = cols / float(W) if cols is not None else rows / float(H) self._tracks = {k:t.scalex(sx) for (k,t) in self.tracks().items()} self._tracks = {k:t.scaley(sy) for (k,t) in self.tracks().items()} super().resize(rows=rows, cols=cols) return self def mindim(self, dim=None): """Resize the video so that the minimum of (width,height)=dim, preserving aspect ratio""" (H,W) = self.shape() # yuck, need to get image dimensions before filter return min(self.shape()) if dim is None else (self if min(H,W) == dim else (self.resize(cols=dim) if W<H else self.resize(rows=dim))) def maxdim(self, dim=None): """Resize the video so that the maximum of (width,height)=dim, preserving aspect ratio""" assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" (H,W) = self.shape() # yuck, need to get image dimensions before filter return max(H,W) if dim is None else (self.resize(cols=dim) if W>H else self.resize(rows=dim)) def rescale(self, s): """Spatially rescale the scene by a constant scale factor. Args: s: [float] Scale factor > 0 to isotropically scale the image. """ assert s == 1 or not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" self._tracks = {k:t.rescale(s) for (k,t) in self.tracks().items()} super().rescale(s) return self def startframe(self): return self._startframe def extrapolate(self, f, dt=None): """Extrapolate the video to frame f and add the extrapolated tracks to the video""" return self.trackmap(lambda t: t.add(f, t.linear_extrapolation(f, dt=dt if dt is not None else self.framerate()), strict=False)) def dedupe(self, spatial_iou_threshold=0.8, dt=5, tracks=True, activities=True, temporal_iou_threshold=0.8, verbose=True): """Find and delete duplicate tracks and activities by overlap. Track deduplication algorithm - For each pair of tracks with the same category, find the larest temporal segment that contains both tracks. - For this segment, compute the IOU for each box interpolated at a stride of dt frames - Compute the mean IOU for this segment. This is the segment IOU. - If the segment IOU is greater than the threshold, merge the shorter of the two tracks with the current track. Activity deduplication algorithm - For each pair of activities in insertion order - If the temporal IOU is greater than the threshold, then merge the older activity (later insertion) with the newer activity (earlier insertion) - Update the actor ID of the merged activity to be that of the newer activity """ if tracks: deleted = set([]) for tj in sorted(self.tracklist(), key=lambda t: len(t), reverse=True): # longest to shortest for (s, ti) in sorted([(0,t) if (len(tj) < len(t) or t.id() in deleted or t.id() == tj.id() or t.category() != tj.category()) else (tj.fragmentiou(t, dt=dt), t) for t in self.tracklist()], key=lambda x: x[0], reverse=True): if s > spatial_iou_threshold: # best mean framewise overlap during overlapping segment of two tracks (ti, tj) if verbose: print('[vipy.video.dedupe]: merging duplicate track "%s" (id=%s) which overlaps with "%s" (id=%s)' % (ti, ti.id(), tj, tj.id())) self.tracks()[tj.id()] = tj.union(ti) # merge self.activitymap(lambda a: a.replace(ti, tj)) # replace merged track reference in activity deleted.add(ti.id()) self.trackfilter(lambda t: t.id() not in deleted) # remove duplicate tracks if activities: deleted = set([]) for (j,aj) in enumerate(self.activitylist()): # preserve insertion order for ai in self.activitylist()[j+1:]: if aj.hasoverlap(ai, threshold=temporal_iou_threshold) and ai.id() not in deleted: if verbose: print('[vipy.video.dedupe]: merging duplicate activity "%s" (id=%s) which overlaps with "%s" (id=%s)' % (ai, ai.id(), aj, aj.id())) self.activities()[aj.id()] = aj.union(ai.clone().replaceid(ai.actorid(), aj.actorid())).addid(ai.actorid()) # merge two activities into one, with two tracks deleted.add(ai.id()) self.activityfilter(lambda a: a.id() not in deleted) # remove duplicate activities return self def combine(self, other, tracks=True, activities=True, rekey=True): """Combine the activities and tracks from both scenes into self. .. note:: This does not perform a union, it simply combines dictionaries. For deduplication, see `vipy.video.Scene.union` """ assert isinstance(other, Scene), "Invalid input - must be vipy.video.Scene() object and not type=%s" % str(type(other)) assert self.framerate() == other.framerate() o = other.clone(rekey=True) if rekey else other # make sure keys are unique if activities: self.activities().update(o.activities()) if tracks: self.tracks().update(o.tracks()) return self def union(self, other, temporal_iou_threshold=0.5, spatial_iou_threshold=0.6, strict=True, overlap='average', percentilecover=0.8, percentilesamples=100, activity=True, track=True): """Compute the union two scenes as the set of unique activities and tracks. A pair of activities or tracks are non-unique if they overlap spatially and temporally by a given IoU threshold. Merge overlapping tracks. Tracks are merged by considering the mean IoU at the overlapping segment of two tracks with the same category greater than the provided spatial_iou_threshold threshold Activities are merged by considering the temporal IoU of the activities of the same class greater than the provided temporal_iou_threshold threshold Args: Other: Scene or list of scenes for union. Other may be a clip of self at a different framerate, spatial isotropic scake, clip offset spatial_iou_threshold: The intersection over union threshold for the mean of the two segments of an overlapping track, Disable by setting to 1.0 temporal_iou_threshold: The intersection over union threshold for a temporal bounding box for a pair of activities to be declared duplicates. Disable by setting to 1.0 strict: Require both scenes to share the same underlying video filename overlap=['average', 'replace', 'keep'] - average: Merge two tracks by averaging the boxes (average=True) if overlapping - replace: merge two tracks by replacing overlapping boxes with other (discard self) - keep: merge two tracks by keeping overlapping boxes with other (discard other) percentilecover: [0,1]: When determining the assignment of two tracks, compute the percentilecover of two tracks by ranking the cover in the overlapping segment and computing the mean of the top-k assignments, where k=len(segment)*percentilecover. percentilesamples: [>1]: the number of samples along the overlapping scemgne for computing percentile cover activity: [bool]: union() of activities only track: [bool]: union() of tracks only Returns: Updates this scene to include the non-overlapping activities from other. By default, it takes the strict union of all activities and tracks. .. note:: - This is useful for merging scenes computed using a lower resolution/framerate/clipped object or activity detector without running the detector on the high-res scene - This function will preserve the invariance for v == v.clear().union(v.rescale(0.5).framerate(5).activityclip()), to within the quantization error of framerate() downsampling. - percentileiou is a robust method of track assignment when boxes for two tracks (e.g. ground truth and detections) where one track may deform due to occlusion. """ assert overlap in ['average', 'replace', 'keep'], "Invalid input - 'overlap' must be in [average, replace, keep]" assert spatial_iou_threshold >= 0 and spatial_iou_threshold <= 1, "invalid spatial_iou_threshold, must be between [0,1]" assert temporal_iou_threshold >= 0 and temporal_iou_threshold <= 1, "invalid temporal_iou_threshold, must be between [0,1]" assert percentilesamples >= 1, "invalid samples, must be >= 1" if not activity and not track: return self # nothing to do sc = self.clone() # do not change self yet, make a copy then merge at the end for o in tolist(other): assert isinstance(o, Scene), "Invalid input - must be vipy.video.Scene() object and not type=%s" % str(type(o)) if strict: assert sc.filename() == o.filename(), "Invalid input - Scenes must have the same underlying video. Disable this with strict=False." oc = o.clone() # do not change other, make a copy # Key collision? if len(set(sc.tracks().keys()).intersection(set(oc.tracks().keys()))) > 0: print('[vipy.video.union]: track key collision - Rekeying other... Use other.rekey() to suppress this warning.') oc.rekey() if len(set(sc.activities().keys()).intersection(set(oc.activities().keys()))) > 0: print('[vipy.video.union]: activity key collision - Rekeying other... Use other.rekey() to suppress this warning.') oc.rekey() # Similarity transform? Other may differ from self by a temporal scale (framerate), temporal translation (clip) or spatial isotropic scale (rescale) assert np.isclose(sc.aspect_ratio(), oc.aspect_ratio(), atol=1E-2), "Invalid input - Scenes must have the same aspect ratio" if sc.width() != oc.width(): oc = oc.rescale(sc.width() / oc.width()) # match spatial scale if not np.isclose(sc.framerate(), oc.framerate(), atol=1E-3): oc = oc.framerate(sc.framerate()) # match temporal scale (video in oc will not match, only annotations) if sc.startframe() != oc.startframe(): dt = (oc.startframe() if oc.startframe() is not None else 0) - (sc.startframe() if sc.startframe() is not None else 0) oc = oc.trackmap(lambda t: t.offset(dt=dt)).activitymap(lambda a: a.offset(dt=dt)) # match temporal translation of tracks and activities oc = oc.trackfilter(lambda t: ((not t.isdegenerate()) and len(t)>0), activitytrack=False) # Merge other tracks into selfclone: one-to-many mapping from self to other merged = {} # dictionary mapping trackid in other to the trackid in self, each track in other can be merged at most once for ti in sorted(sc.tracklist(), key=lambda t: len(t), reverse=True): # longest to shortest for tj in sorted(oc.tracklist(), key=lambda t: len(t), reverse=True): if ti.category() == tj.category() and (tj.id() not in merged) and tj.segment_percentilecover(sc.track(ti.id()), percentile=percentilecover, samples=percentilesamples) > spatial_iou_threshold: # mean framewise overlap during overlapping segment of two tracks sc.tracks()[ti.id()] = sc.track(ti.id()).union(tj, overlap=overlap) # merge duplicate/fragmented tracks from other into self, union() returns clone merged[tj.id()] = ti.id() print('[vipy.video.union]: merging track "%s"(id=%s) + "%s"(id=%s) for scene "%s"' % (str(ti), str(ti.id()), str(tj), str(tj.id()), str(sc))) oc.trackfilter(lambda t: t.id() not in merged, activitytrack=False) # remove duplicate other track for final union # Merge other activities into selfclone: one-to-one mapping for (i,j) in merged.items(): # i=id of other, j=id of self oc.activitymap(lambda a: a.replaceid(i, j) if a.hastrack(i) else a) # update track IDs referenced in activities for merged tracks for (i,ai) in sc.activities().items(): for (j,aj) in oc.activities().items(): if ai.category() == aj.category() and set(ai.trackids()) == set(aj.trackids()) and ai.temporal_iou(aj) > temporal_iou_threshold: oc.activityfilter(lambda a: a.id() != j) # remove duplicate activity from final union oc.activityfilter(lambda a: len(a.tracks())>0) # remove empty activities not merged # Union sc.tracks().update(oc.tracks()) sc.activities().update(oc.activities()) # Final union of unique tracks/activities if track: self.tracks(sc.tracklist()) # union of tracks only if activity: self.activities(sc.activitylist()) # union of activities only: may reference tracks not in self of track=False return self def annotate(self, outfile=None, fontsize=10, captionoffset=(0,0), textfacecolor='white', textfacealpha=1.0, shortlabel=True, boxalpha=0.25, d_category2color={'Person':'green', 'Vehicle':'blue', 'Object':'red'}, categories=None, nocaption=False, nocaption_withstring=[], mutator=None, timestamp=None, timestampcolor='black', timestampfacecolor='white', verbose=False): """Generate a video visualization of all annotated objects and activities in the video. The annotation video will be at the resolution and framerate of the underlying video, and pixels in this video will now contain the overlay. This function does not play the video, it only generates an annotation video frames. Use show() which is equivalent to annotate().saveas().play() Args: outfile: [str] An optional file to stream the anntation to without storing the annotated video in memory fontsize: [int] The fontsize of bounding box captions, used by matplotlib captionoffset: (tuple) The (x,y) offset relative to the bounding box to place the caption for each box. textfacecolor: [str] The color of the text in the bounding box caption. Must be in `vipy.gui.using_matplotlib.colorlist`. textfacealpha: [float] The transparency of the text in the bounding box caption. Must be in [0,1], where 0=transparent and 1=opaque. shortlabel: [bool] If true, display the shortlabel for each object in the scene, otherwise show the full category boxalpha: [float] The transparency of the box face behind the text. Must be in [0,1], where 0=transparent and 1=opaque. d_category2color: [dict] A dictionary mapping categories of objects in the scene to their box colors. Named colors must be in `vipy.gui.using_matplotlib.colorlist`. categories: [list] Only show these categories, or show them all if None nocaption_withstring: [list]: Do not show captions for those detection categories (or shortlabels) containing any of the strings in the provided list nocaption: [bool] If true, do not show any captions, just boxes mutator: [lambda] A lambda function that will mutate an image to allow for complex visualizations. This should be a mutator like `vipy.image.mutator_show_trackid`. timestamp: [bool] If true, show a semitransparent timestamp (when the annotation occurs, not when the video was collected) with frame number in the upper left corner of the video timestampcolor: [str] The color of the timstamp text. Named colors must be in `vipy.gui.using_matplotlib.colorlist`. timestampfacecolor: [str] The color of the timestamp background. Named colors must be in `vipy.gui.using_matplotlib.colorlist`. verbose: [bool] Show more helpful messages if true Returns: A `vipy.video.Video` with annotations in the pixels. If outfile is provided, then the returned video will be flushed. .. note:: In general, this function should not be run on very long videos without the outfile kwarg, as it requires loading the video framewise into memory. """ assert outfile is None or vipy.util.isvideofile(outfile), "Invalid filename extension for annotated video" if verbose: print('[vipy.video.annotate]: Annotating video ...') f_mutator = mutator if mutator is not None else vipy.image.mutator_show_jointlabel() f_timestamp = (lambda k: '%s %d' % (vipy.util.clockstamp(), k)) if timestamp is True else timestamp if outfile is None: assert self.load().isloaded(), "Load() failed" imgs = [f_mutator(self[k].clone(), k).savefig(fontsize=fontsize, captionoffset=captionoffset, textfacecolor=textfacecolor, textfacealpha=textfacealpha, shortlabel=shortlabel, boxalpha=boxalpha, d_category2color=d_category2color, categories=categories, nocaption=nocaption, timestampcolor=timestampcolor, timestampfacecolor=timestampfacecolor, timestamp=f_timestamp(k) if timestamp is not None else None, figure=1 if k<(len(self)-1) else None, # cleanup on last frame nocaption_withstring=nocaption_withstring).numpy() for k in range(0, len(self))] # Replace pixels with annotated pixels and downcast object to vipy.video.Video (since there are no more objects to show) return vipy.video.Video(array=np.stack([np.array(PIL.Image.fromarray(img).convert('RGB')) for img in imgs], axis=0), framerate=self.framerate(), attributes=self.attributes) # slow for large videos else: # Stream to output video without loading all frames into memory n = self.duration_in_frames_of_videofile() if not self.isloaded() else len(self) vo = vipy.video.Video(filename=outfile, framerate=self.framerate()) with vo.stream(overwrite=True) as so: for (k,im) in enumerate(self.stream()): so.write(f_mutator(im.clone(), k).savefig(fontsize=fontsize, captionoffset=captionoffset, textfacecolor=textfacecolor, textfacealpha=textfacealpha, shortlabel=shortlabel, boxalpha=boxalpha, d_category2color=d_category2color, categories=categories, nocaption=nocaption, timestampcolor=timestampcolor, timestampfacecolor=timestampfacecolor, timestamp=f_timestamp(k) if timestamp is not None else None, figure=1 if k<(n-1) else None, # cleanup on last frame nocaption_withstring=nocaption_withstring).rgb()) return vo def _show(self, outfile=None, verbose=True, fontsize=10, captionoffset=(0,0), textfacecolor='white', textfacealpha=1.0, shortlabel=True, boxalpha=0.25, d_category2color={'Person':'green', 'Vehicle':'blue', 'Object':'red'}, categories=None, nocaption=False, nocaption_withstring=[], notebook=False, timestamp=None, timestampcolor='black', timestampfacecolor='white'): """Generate an annotation video saved to outfile (or tempfile if outfile=None) and show it using ffplay when it is done exporting. Do not modify the original video buffer. Returns a clone of the video with the shown annotation.""" return self.clone().annotate(verbose=verbose, fontsize=fontsize, captionoffset=captionoffset, textfacecolor=textfacecolor, textfacealpha=textfacealpha, shortlabel=shortlabel, boxalpha=boxalpha, d_category2color=d_category2color, categories=categories, nocaption=nocaption, timestampcolor=timestampcolor, timestampfacecolor=timestampfacecolor, timestamp=timestamp, nocaption_withstring=nocaption_withstring).saveas(outfile).play(notebook=notebook) def show(self, outfile=None, verbose=True, fontsize=10, captionoffset=(0,0), textfacecolor='white', textfacealpha=1.0, shortlabel=True, boxalpha=0.25, d_category2color={'Person':'green', 'Vehicle':'blue', 'Object':'red'}, categories=None, nocaption=False, nocaption_withstring=[], figure=1, fps=None, timestamp=None, timestampcolor='black', timestampfacecolor='white', mutator=None): """Faster show using interative image show for annotated videos. This can visualize videos before video rendering is complete, but it cannot guarantee frame rates. Large videos with complex scenes will slow this down and will render at lower frame rates.""" fps = min(fps, self.framerate()) if fps is not None else self.framerate() assert fps > 0, "Invalid display framerate" f_timestamp = (lambda k: '%s %d' % (vipy.util.clockstamp(), k)) if timestamp is True else timestamp f_mutator = mutator if mutator is not None else vipy.image.mutator_show_jointlabel() if not self.isdownloaded() and self.hasurl(): self.download() with Stopwatch() as sw: for (k,im) in enumerate(self.load() if self.isloaded() else self.stream()): time.sleep(max(0, (1.0/self.framerate())*int(np.ceil((self.framerate()/fps))))) f_mutator(im,k).show(categories=categories, figure=figure, nocaption=nocaption, nocaption_withstring=nocaption_withstring, fontsize=fontsize, boxalpha=boxalpha, d_category2color=d_category2color, captionoffset=captionoffset, textfacecolor=textfacecolor, textfacealpha=textfacealpha, timestampcolor=timestampcolor, timestampfacecolor=timestampfacecolor, timestamp=f_timestamp(k) if timestamp is not None else None, shortlabel=shortlabel) if vipy.globals._user_hit_escape(): break vipy.show.close(figure) return self def thumbnail(self, outfile=None, frame=0, fontsize=10, nocaption=False, boxalpha=0.25, dpi=200, textfacecolor='white', textfacealpha=1.0): """Return annotated frame=k of video, save annotation visualization to provided outfile if provided, otherwise return vipy.image.Scene""" im = self.frame(frame, img=self.preview(framenum=frame).array()) return im.savefig(outfile=outfile, fontsize=fontsize, nocaption=nocaption, boxalpha=boxalpha, dpi=dpi, textfacecolor=textfacecolor, textfacealpha=textfacealpha) if outfile is not None else im def stabilize(self, padheightfrac=0.125, padwidthfrac=0.25, padheightpx=None, padwidthpx=None, gpu=None, outfile=None): """Background stablization using flow based stabilization masking foreground region. - This will output a video with all frames aligned to the first frame, such that the background is static. - This uses the flow based approach described in `vipy.flow.Flow.stabilize` Args: padheightfrac: [float] The height padding (relative to video height) to be applied to output video to allow for vertical stabilization padwidthfrac: [float] The width padding (relative to video width) to be applied to output video to allow for horizontal stabilization padheightpx: [int] The height padding to be applied to output video to allow for vertical stabilization. Overrides padheight. padwidthpx: [int] The width padding to be applied to output video to allow for horizontal stabilization. Overrides padwidth. gpu: [int] The GPU index to use, if opencv has been compiled with GPU support (this is rare) outfile: [str] The output filename to store the stabilized video Returns: A clone of this video with background pixels stabilized to the first frame. .. note:: - If the camera pans outside the image rectangle, increase the padheight or padwidth to make sure that the actor stays inside the stabilized image rectangle - If there are moving actors in the scene, include bounding boxes for each and these boxes are ignored as keeyouts in the flow stabilization """ from vipy.flow import Flow # requires opencv return Flow(flowdim=256, gpu=gpu).stabilize(self.clone(), residual=True, strict=True, padheightfrac=padheightfrac, padwidthfrac=padwidthfrac, padheightpx=padheightpx, padwidthpx=padwidthpx, outfile=outfile) def pixelmask(self, pixelsize=8): """Replace all pixels in foreground boxes with pixelation (e.g. bigger pixels, like privacy glass)""" for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy im.pixelmask(pixelsize) # shared numpy array return self def pixelize(self, radius=16): """Alias for pixelmask()""" return self.pixelmask(pixelsize=radius) def pixelate(self, radius=16): """Alias for pixelmask()""" return self.pixelmask(pixelsize=radius) def binarymask(self): """Replace all pixels in foreground boxes with white, zero in background""" for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy im.binarymask() # shared numpy array return self def asfloatmask(self, fg=1.0, bg=0.0): """Replace all pixels in foreground boxes with fg, and bg in background, return a copy""" assert self.isloaded() self.numpy() # convert to writeable numpy array, triggers writeable copy array = np.full( (len(self.load()), self.height(), self.width(), 1), dtype=np.float32, fill_value=bg) for (k,im) in enumerate(self): for bb in im.objects(): if bb.hasintersection(im.imagebox()): array[k, int(round(bb._ymin)):int(round(bb._ymax)), int(round(bb._xmin)):int(round(bb._xmax))] = fg # does not need imclip return vipy.video.Video(array=array, framerate=self.framerate(), colorspace='float') def meanmask(self): """Replace all pixels in foreground boxes with mean color""" for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy im.meanmask() # shared numpy array return self def fgmask(self): """Replace all pixels in foreground boxes with zero""" for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy im.fgmask() # shared numpy array return self def zeromask(self): """Alias for fgmask""" return self.fgmask() def blurmask(self, radius=7): """Replace all pixels in foreground boxes with gaussian blurred foreground""" for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy im.blurmask(radius) # shared numpy array return self def downcast(self): """Cast the object to a `vipy.video.Video` class""" self.__class__ = vipy.video.Video return self def merge_tracks(self, dilate_height=2.0, dilate_width=2.0, framedist=5): """Merge tracks if a track endpoint dilated by a fraction overlaps exactly one track startpoint, and the endpoint and startpoint are close enough together temporally. .. note:: - This is useful for continuing tracking when the detection framerate was too low and the assignment falls outside the measurement gate. - This will not work for complex scenes, as it assumes that there is exactly one possible continuation for a track. """ merged = set([]) for ti in sorted(self.tracklist(), key=lambda t: t.startframe()): for tj in sorted(self.tracklist(), key=lambda t: t.startframe()): if (tj.id() not in merged) and (ti.id() != tj.id()) and (tj.startframe() >= ti.endframe()) and ((tj.startframe()-ti.endframe()) <= framedist) and (ti.category() == tj.category()): di = ti[ti.endframe()].dilate_height(dilate_height).dilate_width(dilate_width) dj = tj[tj.startframe()] if di.iou(dj) > 0 and not any([di.iou(tk[tj.startframe()]) > 0 for tk in self.tracklist() if (tk.id() not in [ti.id(), tj.id()]) and tk.during(tj.startframe())]): self.tracks()[ti.id()] = ti.union(tj) # Merge tracks that are within gating distance self.delete(tj.id()) # remove merged track merged.add(tj.id()) break return self def assign(self, frame, dets, minconf=0.2, maxhistory=5, activityiou=0.5, trackcover=0.2, trackconfsamples=4, gate=0, activitymerge=True, activitynms=False): """Assign a list of `vipy.object.Detection` object detections and `vipy.activity.Activity` activity detections at frame k to scene tracks and activities by greedy assignment. In-place update. Approach: - This approach is equivalent to greedy, constant velocity SORT tracking (https://arxiv.org/abs/1602.00763) - Individual detections are assigned to tracks using a greedy velocity only track propagation, sorted by `vipy.geometry.BoundingBox.maxcover` and detection confidence within a spatial tracking gate - New tracks are created if the detection is unassigned and above a minimum confidence - Updated tracks resulting from assignment are stored in `vipy.video.Scene.tracks` Args: frame: [int] The frame index to assign the detections into the scene dets: [list] A list of `vipy.object.Detection` or `vipy.activity.Activity` objects as returned from a detector miniou: [float] the minimum temporal IOU for activity assignment minconf: [float] the minimum confidence for a detection to be considered as a new track maxhistory: [int] the maximum propagation length of a track with no measurements, the frame history used for velocity estimates trackconfsamples: [int] the number of uniformly spaced samples along a track to compute a mean track confidence gate: [int] the gating distance in pixels used for assignment of fast moving detections. Useful for low detection framerates if a detection does not overlap with the track. trackcover: [float] the minimum cover necessary for assignment of a detection to a track activitymerge: [bool] if true, then merge overlapping activity detections of the same track and category, otherwise each activity detection is added as a new detection activitynms: [bool] if true, then perform non-maximum suppression of activity detections of the same actor and category that overlap more than activityiou Returns: This video object with each det assigned to corresponding track or activity. """ assert dets is None or all([isinstance(d, vipy.object.Detection) or isinstance(d, vipy.activity.Activity) for d in tolist(dets)]), "invalid input" assert frame >= 0 and minconf >= 0 and minconf <= 1.0 and maxhistory > 0, "invalid input" if dets is None or len(tolist(dets)) == 0: return self dets = tolist(dets) if any([d.confidence() is None for d in dets]): warnings.warn('Removing %d detections with no confidence' % len([d.confidence() is None for d in dets])) dets = [d for d in dets if d.confidence() is not None] objdets = [d for d in dets if isinstance(d, vipy.object.Detection)] activitydets = [d for d in dets if isinstance(d, vipy.activity.Activity)] # Object detection to track assignment if len(objdets) > 0: # Track propagation: Constant velocity motion model for active tracks t_ref = [(t, t.linear_extrapolation(frame, dt=maxhistory, shape=False)) for (k,t) in self.tracks().items() if ((frame - t.endframe()) <= maxhistory)] trackarea = [ti.area() for (t,ti) in t_ref] detarea = [d.area() for d in objdets] # Track assignment: # - Each track is assigned at most one detection # - Each detection is assigned to at most one track. # - Assignment is the highest confidence maximum overlapping detection by cover within tracking gate trackconf = {t.id():t.confidence(samples=trackconfsamples) for (t, ti) in t_ref} assignments = [(t, d.confidence(), d.iou(ti, area=detarea[j], otherarea=trackarea[i]), d.shapeiou(ti, area=detarea[j], otherarea=trackarea[i]), d.maxcover(ti, area=detarea[j], otherarea=trackarea[i]), d) for (i, (t, ti)) in enumerate(t_ref) for (j,d) in enumerate(objdets) if (t.category() == d.category() and (((ti._xmax if ti._xmax < d._xmax else d._xmax) - (ti._xmin if ti._xmin > d._xmin else d._xmin)) > 0 and ((ti._ymax if ti._ymax < d._ymax else d._ymax) - (ti._ymin if ti._ymin > d._ymin else d._ymin)) > 0))] assigned = set([]) posconf = min([d.confidence() for d in objdets]) if len(objdets)>0 else 0 assignments.sort(key=lambda x: (x[1]+posconf)*(x[2]+x[3]+x[4])+trackconf[x[0].id()], reverse=True) # in-place for (t, conf, iou, shapeiou, cover, d) in assignments: if cover > (trackcover if len(t)>1 else 0): # the highest confidence detection within the assignment gate (or any overlap if not yet enough history for velocity estimate) if (t.id() not in assigned and d.id() not in assigned): # not assigned yet, assign it! self.track(t.id()).update(frame, d.clone()) # track assignment! (clone required) assigned.add(t.id()) # cannot assign again to this track assigned.add(d.id()) # mark detection as assigned # Track spawn from unassigned and unexplained detections for (j,d) in enumerate(objdets): if (d.id() not in assigned): if (d.confidence() >= minconf and not any([t.linear_extrapolation(frame, dt=maxhistory, shape=False).maxcover(d, otherarea=detarea[j]) >= 0.7 for (i,(t,ti)) in enumerate(t_ref) if t.category() == d.category()])): gated = [(t, t.linear_extrapolation(frame, dt=maxhistory, shape=False)) for (t,ti) in t_ref if (t.id() not in assigned and t.category() == d.category())] if gate>0 else [] gated = sorted([(t, ti) for (t, ti) in gated if ti.hasintersection(d, gate=gate)], key=lambda x: d.sqdist(x[1])) if len(gated) > 0: self.track(gated[0][0].id()).update(frame, d.clone()) # track assignment! (clone required) assigned.add(gated[0][0].id()) assigned.add(d.id()) else: assigned.add(self.add(vipy.object.Track(keyframes=[frame], boxes=[d.clone()], category=d.category(), framerate=self.framerate()), rangecheck=False)) # clone required assigned.add(d.id()) # Activity assignment if len(activitydets) > 0: assert all([d.actorid() in self.tracks() for d in activitydets]), "Invalid activity" assigned = set([]) if activitymerge: minframe = min([a._startframe for a in activitydets]) activities = [a for a in self.activities().values() if a._endframe >= minframe] for d in activitydets: for a in activities: if (a._label == d._label) and (a._actorid == d._actorid) and a.hasoverlap(d, activityiou): a.union(d) # activity assignment assigned.add(d._id) break # assigned, early exit if activitynms: minframe = min([a._startframe for a in activitydets]) activities = sorted([a for a in self.activities().values() if a._endframe >= minframe], key=lambda a: a.confidence(), reverse=True) for d in sorted(activitydets, key=lambda x: x.confidence(), reverse=True): for a in activities: if (a._label == d._label) and (a._actorid == d._actorid) and a.hasoverlap(d, activityiou): assigned.add(a._id if d.confidence()>a.confidence() else d._id) # suppressed for id in assigned: if id in self._activities: del self._activities[id] # suppression, faster than self.activityfilter(lambda a: a.id() in assigned) # Activity construction from unassigned detections for d in activitydets: if d._id not in assigned: self.add(d.clone()) return self
Ancestors
Static methods
def asjson(s)
-
Restore an object serialized with self.json(). Alas for
Scene.from_json()
.Usage:
vs = vipy.video.Scene.asjson(v.json())
Expand source code Browse git
@classmethod def asjson(cls, s): """Restore an object serialized with self.json(). Alas for `vipy.video.Scene.from_json`. Usage: ```python vs = vipy.video.Scene.asjson(v.json()) ``` """ return vipy.video.Scene.from_json(s)
def cast(v, flush=False)
-
Cast a conformal vipy object to this class. This is useful for downcast and upcast conversion of video objects.
Expand source code Browse git
@classmethod def cast(cls, v, flush=False): """Cast a conformal vipy object to this class. This is useful for downcast and upcast conversion of video objects.""" assert isinstance(v, vipy.video.Video), "Invalid input - must be derived from vipy.video.Video" if v.__class__ != vipy.video.Scene: v.__class__ = vipy.video.Scene v._tracks = {} if flush or not hasattr(v, '_tracks') else v._tracks v._activities = {} if flush or not hasattr(v, '_activities') else v._activities v._category = None if flush or not hasattr(v, '_category') else v._category return v
def from_json(s)
-
Restore an object serialized with self.json()
Usage:
vs = vipy.video.Scene.from_json(v.json())
Expand source code Browse git
@classmethod def from_json(cls, s): """Restore an object serialized with self.json() Usage: ```python vs = vipy.video.Scene.from_json(v.json()) ``` """ d = json.loads(s) if not isinstance(s, dict) else s d = {k.lstrip('_'):v for (k,v) in d.items()} # prettyjson (remove "_" prefix to attributes) v = super().from_json(s) # Packed attribute storage: # - When loading a large number of vipy objects, the python garbage collector slows down signficantly due to reference cycle counting # - Mutable objects and custom containers are tracked by the garbage collector and the more of them that are loaded the longer GC takes # - To avoid this, load attributes as tuples of packed strings. This is an immutable type that is not reference counted. Check this with gc.is_tracked() # - Then, unpack load the attributes on demand when accessing tracks() or activities(). Then, the nested containers are reference counted (even though they really should not since there are no cycles by construction) # - This is useful when calling vipy.util.load(...) on archives that contain hundreds of thousands of objects # - Do not access the private attributes self._tracks and self._attributes as they will be packed until needed # - Should install ultrajson (pip install ujson) for super fast parsing v._tracks = tuple([x if isinstance(x, str) else str(json.dumps(x)) for x in d['tracks'].values()]) # track ID key is embedded in object, legacy unpack of doubly JSON encoded strings (vipy-1.11.16) v._activities = tuple([x if isinstance(x, str) else str(json.dumps(x)) for x in d['activities'].values()]) # track ID key is embedded in object, legacy unpack of doubly JSON encoded strings (vipy-1.11.16) return v
Methods
def activities(self, activities=None, id=None)
-
Return mutable dictionary of activities. All temporal alignment is relative to the current clip().
Expand source code Browse git
def activities(self, activities=None, id=None): """Return mutable dictionary of activities. All temporal alignment is relative to the current clip().""" if isinstance(self._activities, tuple): self._activities = {a.id():a for a in [vipy.activity.Activity.from_json(json.loads(s)) for s in self._activities]} # on-demand if activities is None and id is None: return self._activities # mutable dict elif id is not None: return self._activities[id] elif isinstance(activities, dict): assert all([isinstance(a, vipy.activity.Activity) and k == a.id() for (k,a) in activities.items()]), "Invalid input - Must be dictionary of vipy.activity.Activity" self._activities = activities.copy() # shallow copy return self else: assert all([isinstance(a, vipy.activity.Activity) for a in tolist(activities)]), "Invalid input - Must be vipy.activity.Activity or list of vipy.activity.Activity" self._activities = {a.id():a for a in tolist(activities)} # insertion order preserved (python >=3.6) return self
def activity(self, id=None)
-
Expand source code Browse git
def activity(self, id=None): return self.activities(id=id) if id is not None else self.primary_activity()
def activity_categories(self)
-
Alias for activitylabels()
Expand source code Browse git
def activity_categories(self): """Alias for activitylabels()""" return self.activitylabels()
def activitybox(self, activityid=None, dilate=1.0)
-
The activitybox is the union of all activity bounding boxes in the video, which is the union of all tracks contributing to all activities. This is most useful after activityclip(). The activitybox is the smallest bounding box that contains all of the boxes from all of the tracks in all activities in this video.
Expand source code Browse git
def activitybox(self, activityid=None, dilate=1.0): """The activitybox is the union of all activity bounding boxes in the video, which is the union of all tracks contributing to all activities. This is most useful after activityclip(). The activitybox is the smallest bounding box that contains all of the boxes from all of the tracks in all activities in this video. """ activities = [a for (k,a) in self.activities().items() if (activityid is None or k in set(activityid))] boxes = [t.clone().boundingbox().dilate(dilate) for t in self.tracklist() if any([a.hastrack(t) for a in activities])] return boxes[0].union(boxes[1:]) if len(boxes) > 0 else vipy.geometry.BoundingBox(xmin=0, ymin=0, width=int(self.width()), height=int(self.height()))
def activityclip(self, padframes=0, multilabel=True, idx=None, padto=None, padtosec=None)
-
Return a list of
Scene
objects each clipped to be temporally centered on a single activity, with an optional padframes before and after.Args
padframes
- [int] for symmetric padding same before and after
padframes
- [tuple] (int, int) for asymmetric padding before and after
padframes
- [list[tuples]] [(int, int), …] for activity specific asymmetric padding. See also padto.
multilabel
- [bool] include overlapping multilabel secondary activities in each activityclip
idx
- [int], [tuple], [list]. The indexes of the activities to return, where the index is the integer index order of the activity in the video. Useful for complex videos.
padto
- [int] padding so that each activity clip is at least padto frames long, with symmetric padding around the activity.
padtosec
- [float] padding so that each activity clip is at least padtosec seconds long, with symmetric padding around the activity.
Returns
A list of
Scene
each cloned from the source video and clipped on one activity in the sceneNotes
- The Scene() category is updated to be the activity category of the clip, and only the objects participating in the activity are included.
- Clips are returned ordered in the temporal order they appear in the video.
- The returned vipy.video.Scene() objects for each activityclip are clones of the video, with the video buffer flushed.
- Each activityclip() is associated with each activity in the scene, and includes all other secondary activities that the objects in the primary activity also perform (if multilabel=True). See activityclip().labels().
- Calling activityclip() on activityclip(multilabel=True) will duplicate activities, due to the overlapping secondary activities being included in each clip with an overlap. Be careful!
Expand source code Browse git
def activityclip(self, padframes=0, multilabel=True, idx=None, padto=None, padtosec=None): """Return a list of `vipy.video.Scene` objects each clipped to be temporally centered on a single activity, with an optional padframes before and after. Args: padframes: [int] for symmetric padding same before and after padframes: [tuple] (int, int) for asymmetric padding before and after padframes: [list[tuples]] [(int, int), ...] for activity specific asymmetric padding. See also padto. multilabel: [bool] include overlapping multilabel secondary activities in each activityclip idx: [int], [tuple], [list]. The indexes of the activities to return, where the index is the integer index order of the activity in the video. Useful for complex videos. padto: [int] padding so that each activity clip is at least padto frames long, with symmetric padding around the activity. padtosec: [float] padding so that each activity clip is at least padtosec seconds long, with symmetric padding around the activity. Returns: A list of `vipy.video.Scene` each cloned from the source video and clipped on one activity in the scene .. notes:: - The Scene() category is updated to be the activity category of the clip, and only the objects participating in the activity are included. - Clips are returned ordered in the temporal order they appear in the video. - The returned vipy.video.Scene() objects for each activityclip are clones of the video, with the video buffer flushed. - Each activityclip() is associated with each activity in the scene, and includes all other secondary activities that the objects in the primary activity also perform (if multilabel=True). See activityclip().labels(). - Calling activityclip() on activityclip(multilabel=True) will duplicate activities, due to the overlapping secondary activities being included in each clip with an overlap. Be careful! """ assert isinstance(padframes, int) or istuple(padframes) or islist(padframes) vid = self.clone(flushforward=True) if any([(a.endframe()-a.startframe()) <= 0 for a in vid.activities().values()]): warnings.warn('Filtering invalid activity clips with degenerate lengths: %s' % str([a for a in vid.activities().values() if (a.endframe()-a.startframe()) <= 0])) primary_activities = sorted([a.clone() for a in vid.activities().values() if (a.endframe()-a.startframe()) > 0], key=lambda a: a.startframe()) # only activities with at least one frame, sorted in temporal order padframelist = [padframes if istuple(padframes) else (padframes, padframes) for k in range(len(primary_activities))] if not islist(padframes) else padframes tracks = [ [t.clone() for (tid, t) in vid.tracks().items() if a.hastrackoverlap(t)] for a in primary_activities] # tracks associated with and temporally overlapping each primary activity (may be empty) secondary_activities = [[sa.clone() for sa in primary_activities if (sa.id() != pa.id() and pa.clone().temporalpad((prepad, postpad)).hasoverlap(sa) and (len(T)==0 or any([sa.hastrack(t) for t in T])))] for (pa, T, (prepad,postpad)) in zip(primary_activities, tracks, padframelist)] # overlapping secondary activities that includes any track in the primary activity secondary_activities = [sa if multilabel else [] for sa in secondary_activities] vid._activities = {} # for faster clone vid._tracks = {} # for faster clone maxframes = self.duration_in_frames() if (padframes != 0 or padto is not None or padtosec is not None) else None if padto is not None or padtosec is not None: cliplist = [(a.startframe(), a.endframe()) for a in primary_activities] padto = padto if padto is not None else int(round(padtosec*self.framerate())) padframelist = [(sp+int(np.ceil(((padto-(ef-sf))/2))), ep+int(np.ceil(((padto-(ef-sf))/2)))) if (ef-sf)<padto else (sp,ep) for ((sp,ep),(sf,ef)) in zip(padframelist, cliplist)] padframelist = [(0,ep+(-sp)) if (sp<0) else ((sp+(ep-(maxframes-ef)), maxframes-ef) if ((ef+ep)>maxframes) else (sp,ep)) for ((sp,ep),(sf,ef)) in zip(padframelist, cliplist)] # truncate to video boundary return [vid.clone() .activities([pa]+sa) # primary activity first .tracks(t) .clip(startframe=max(pa.startframe()-prepad, 0), endframe=min(pa.endframe()+postpad, (maxframes if maxframes is not None else pa.endframe()+postpad))) .category(pa.category()) .setactorid(pa.actorid()) # actor is actor of primary activity .setattribute('_instance_id', ('%s_%d' % (vid.videoid(), k)) if not vid.hasattribute('_instance_id') else vid.getattribute('_instance_id')) for (k,(pa,sa,t,(prepad,postpad))) in enumerate(zip(primary_activities, secondary_activities, tracks, padframelist)) if (idx is None or k in tolist(idx))]
def activitycuboid(self, activityid=None, dilate=1.0, maxdim=256, bb=None)
-
The activitycuboid() is the fixed square spatial crop corresponding to the activitybox (or supplied bounding box), which contains all of the valid activities in the scene. This is most useful after activityclip(). The activitycuboid() is a spatial crop of the video corresponding to the supplied boundingbox or the square activitybox(). This crop must be resized such that the maximum dimension is provided since the crop can be tiny and will not be encodable by ffmpeg
Expand source code Browse git
def activitycuboid(self, activityid=None, dilate=1.0, maxdim=256, bb=None): """The activitycuboid() is the fixed square spatial crop corresponding to the activitybox (or supplied bounding box), which contains all of the valid activities in the scene. This is most useful after activityclip(). The activitycuboid() is a spatial crop of the video corresponding to the supplied boundingbox or the square activitybox(). This crop must be resized such that the maximum dimension is provided since the crop can be tiny and will not be encodable by ffmpeg """ bb = self.activitybox(activityid).maxsquare() if bb is None else bb assert bb is None or isinstance(bb, vipy.geometry.BoundingBox) assert bb.issquare(), "Add support for non-square boxes" return self.clone().crop(bb.dilate(dilate).int(), zeropad=True).resize(maxdim, maxdim) # crop triggers preview()
def activityfilter(self, f)
-
Apply boolean lambda function f to each activity and keep activity if function is true, remove activity if function is false
Filter out all activities longer than 128 frames
vid = vid.activityfilter(lambda a: len(a)<128)
Filter out activities with category in set
vid = vid.activityfilter(lambda a: a.category() in set(['category1', 'category2']))
Args
f
- [lambda] a lambda function that takes an activity and returns a boolean
Returns
This video with the activities f(a)==False removed.
Expand source code Browse git
def activityfilter(self, f): """Apply boolean lambda function f to each activity and keep activity if function is true, remove activity if function is false Filter out all activities longer than 128 frames ```python vid = vid.activityfilter(lambda a: len(a)<128) ``` Filter out activities with category in set ```python vid = vid.activityfilter(lambda a: a.category() in set(['category1', 'category2'])) ``` Args: f: [lambda] a lambda function that takes an activity and returns a boolean Returns: This video with the activities f(a)==False removed. """ assert callable(f) self._activities = {k:a for (k,a) in self.activities().items() if f(a) == True} return self
def activityindex(self, k)
-
Return the
Activity
at the requested index order in the videoExpand source code Browse git
def activityindex(self, k): """Return the `vipy.activity.Activity` at the requested index order in the video""" alist = self.activitylist() assert k >= 0 and k < len(alist), "Invalid index" return alist[k]
def activitylabel(self, startframe=None, endframe=None)
-
Return an iterator over activity labels in each frame, starting from startframe and ending when there are no more activities
Expand source code Browse git
def activitylabel(self, startframe=None, endframe=None): """Return an iterator over activity labels in each frame, starting from startframe and ending when there are no more activities""" endframe = endframe if endframe is not None else (max([a.endframe() for a in self.activitylist()]) if len(self.activities())>0 else 0) startframe = startframe if startframe is not None else (min([a.startframe() for a in self.activitylist()]) if len(self.activities())>0 else 0) assert startframe <= endframe for k in range(startframe, endframe): yield self.activitylabels(k)
def activitylabels(self, startframe=None, endframe=None)
-
Return a set of all activity categories in this scene, or at startframe, or in range [startframe, endframe]
Expand source code Browse git
def activitylabels(self, startframe=None, endframe=None): """Return a set of all activity categories in this scene, or at startframe, or in range [startframe, endframe]""" if startframe is None: return set([a.category() for a in self.activities().values()]) elif startframe is not None and endframe is None: return set([a.category() for a in self.activities().values() if a.during(startframe)]) elif startframe is not None and endframe is not None: return [set([a.category() for a in self.activities().values() if a.during(k)]) for k in range(startframe, endframe)] else: raise ValueError('Invalid input - must specify both startframe and endframe, or only startframe')
def activitylist(self)
-
Return a list of activities in the video, returned in insertion order.
Returns
A list of
Activity
insertion ordered into the original videoNote: The order of the activitylist() will not match the order of activityclip(), which is sorted by activity startframe. To match, use sorted(activitylist, key=lambda a: a.startframe())
Expand source code Browse git
def activitylist(self): """Return a list of activities in the video, returned in insertion order. Returns: A list of `vipy.activity.Activity` insertion ordered into the original video .. note:: The order of the activitylist() will not match the order of activityclip(), which is sorted by activity startframe. To match, use sorted(activitylist, key=lambda a: a.startframe()) """ return list(self.activities().values()) # insertion ordered (python >=3.6), triggers shallow copy
def activitymap(self, f)
-
Apply lambda function f to each activity
Expand source code Browse git
def activitymap(self, f): """Apply lambda function f to each activity""" assert callable(f) self._activities = {k:f(a) for (k,a) in self.activities().items()} assert all([isinstance(a, vipy.activity.Activity) for a in self.activitylist()]), "Lambda function must return vipy.activity.Activity()" return self
def activitysplit(self, idx=None)
-
Split the scene into k separate scenes, one for each activity. Do not include overlapping activities.
Args
idx
- [int],[tuple],[list]. Return only those activities in the provided activity index list, where the activity index is the integer index of the activity in the video.
Note: This is useful for union()
Expand source code Browse git
def activitysplit(self, idx=None): """Split the scene into k separate scenes, one for each activity. Do not include overlapping activities. Args: idx: [int],[tuple],[list]. Return only those activities in the provided activity index list, where the activity index is the integer index of the activity in the video. .. note:: This is useful for union() """ vid = self.clone(flushforward=True) if any([(a.endframe()-a.startframe()) <= 0 for a in vid.activities().values()]): warnings.warn('Filtering invalid activity with degenerate lengths: %s' % str([a for a in vid.activities().values() if (a.endframe()-a.startframe()) <= 0])) activities = sorted([a.clone() for a in vid.activities().values() if (a.endframe()-a.startframe()) > 0], key=lambda a: a.startframe()) # only activities with at least one frame, sorted in temporal order tracks = [ [t.clone() for (tid, t) in vid.tracks().items() if a.hastrack(t)] for a in activities] # tracks associated with each activity (may be empty) vid._activities = {} # for faster clone vid._tracks = {} # for faster clone return [vid.clone() .setattribute('_instance_id', ('%s_%d' % (vid.videoid(), k)) if not vid.hasattribute('_instance_id') else vid.getattribute('_instance_id')) .activities(pa) .tracks(t) .setactorid(pa.actorid()) for (k,(pa,t)) in enumerate(zip(activities, tracks)) if idx is None or k in tolist(idx)]
def activitysquare(self, activityid=None, dilate=1.0, maxdim=256)
-
The activity square is the maxsquare activitybox that contains only valid (non-padded) pixels interior to the image
Expand source code Browse git
def activitysquare(self, activityid=None, dilate=1.0, maxdim=256): """The activity square is the maxsquare activitybox that contains only valid (non-padded) pixels interior to the image""" bb = self.activitybox(activityid).maxsquare().dilate(dilate).int().iminterior(self.width(), self.height()).minsquare() return self.activitycuboid(activityid, dilate=1.0, maxdim=maxdim, bb=bb)
def activitytube(self, activityid=None, dilate=1.0, maxdim=256)
-
The activitytube() is a sequence of crops where the spatial box changes on every frame to track the activity.
The box in each frame is the square activitybox() for this video which is the union of boxes contributing to this activity in each frame. This function does not perform any temporal clipping. Use activityclip() first to split into individual activities.
Crops will be optionally dilated, with zeropadding if the box is outside the image rectangle. All crops will be resized so that the maximum dimension is maxdim (and square by default)Expand source code Browse git
def activitytube(self, activityid=None, dilate=1.0, maxdim=256): """The activitytube() is a sequence of crops where the spatial box changes on every frame to track the activity. The box in each frame is the square activitybox() for this video which is the union of boxes contributing to this activity in each frame. This function does not perform any temporal clipping. Use activityclip() first to split into individual activities. Crops will be optionally dilated, with zeropadding if the box is outside the image rectangle. All crops will be resized so that the maximum dimension is maxdim (and square by default) """ vid = self.clone().load() # triggers load self.activityfilter(lambda a: activityid is None or a.id() in set(activityid)) # only requested IDs (or all of them) frames = [im.padcrop(im.boundingbox().maxsquare().dilate(dilate).int()).resize(maxdim, maxdim) for im in vid if im.boundingbox() is not None] # track interpolation, for frames with boxes only if len(frames) != len(vid): warnings.warn('[vipy.video.activitytube]: Removed %d frames with no spatial bounding boxes' % (len(vid) - len(frames))) vid.attributes['__activtytube'] = {'truncated':len(vid) - len(frames)} # provenance to reject if len(frames) == 0: warnings.warn('[vipy.video.activitytube]: Resulting video is empty! Setting activitytube to zero') frames = [ vid[0].resize(maxdim, maxdim).zeros() ] # empty frame vid.attributes['__activitytube'] = {'empty':True} # provenance to reject vid._tracks = {ti:vipy.object.Track(keyframes=[f for (f,im) in enumerate(frames) for d in im.objects() if d.attributes['trackid'] == ti], boxes=[d for (f,im) in enumerate(frames) for d in im.objects() if d.attributes['trackid'] == ti], category=t.category(), trackid=ti, framerate=self.framerate()) for (k,(ti,t)) in enumerate(self.tracks().items())} # replace tracks with boxes relative to tube return vid.array(np.stack([im.numpy() for im in frames]))
def actor(self)
-
Return the primary actor (first
Track
) in the videoExpand source code Browse git
def actor(self): """Return the primary actor (first `vipy.object.Track`) in the video""" return next(iter(self.tracks().values())) if len(self._tracks)>0 else None # Python >=3.6
def actorid(self, id=None, fluent=False)
-
Return or set the actor ID for the video.
- The actor ID is the track ID of the primary actor in the scene. This is useful for assigning a role for activities that are performed by the actor.
- The actor ID is the first track is in the tracklist
Args
id
- [str] if not None, then use this track ID as the actor
fluent
- [bool] If true, always return self. This is useful for those cases where the actorid being set is None.
Returns
[id=None, fluent=False] the actor ID [id is not None] The video with the actor ID set, only if the ID is found in the tracklist
Note: Not to be confused with biometric subject id. For videos collected with Visym Collector platform (https://visym.com/collector), the biometric subject ID can be retrieved via
Video.metadata()
(e.g. self.metadata()['subject_ids']).Expand source code Browse git
def actorid(self, id=None, fluent=False): """Return or set the actor ID for the video. - The actor ID is the track ID of the primary actor in the scene. This is useful for assigning a role for activities that are performed by the actor. - The actor ID is the first track is in the tracklist Args: id: [str] if not None, then use this track ID as the actor fluent: [bool] If true, always return self. This is useful for those cases where the actorid being set is None. Returns: [id=None, fluent=False] the actor ID [id is not None] The video with the actor ID set, only if the ID is found in the tracklist .. note:: Not to be confused with biometric subject id. For videos collected with Visym Collector platform (https://visym.com/collector), the biometric subject ID can be retrieved via `vipy.video.Video.metadata` (e.g. self.metadata()['subject_ids']). """ if id is None: return (next(iter(self.tracks().keys())) if len(self._tracks)>0 else None) if not fluent else self # Python >=3.6 elif id in self._tracks: # Reorder tracks so that id is first idlist = [id] + [ti for ti in self.tracks().keys() if ti != id] self._tracks = {k:self.track(k) for k in idlist} else: warnings.warn('trackid=%s not found in "%s"' % (str(id), str(self))) return self
def actortube(self, trackid=None, dilate=1.0, maxdim=256, strict=True)
-
The actortube() is a sequence of crops where the spatial box changes on every frame to track the primary actor performing an activity.
The box in each frame is the square box centered on the primary actor performing the activity, dilated by a given factor (the original box around the actor is unchanged, this just increases the context, with zero padding) This function does not perform any temporal clipping. Use activityclip() first to split into individual activities.
All crops will be resized so that the maximum dimension is maxdim (and square by default)Expand source code Browse git
def actortube(self, trackid=None, dilate=1.0, maxdim=256, strict=True): """The actortube() is a sequence of crops where the spatial box changes on every frame to track the primary actor performing an activity. The box in each frame is the square box centered on the primary actor performing the activity, dilated by a given factor (the original box around the actor is unchanged, this just increases the context, with zero padding) This function does not perform any temporal clipping. Use activityclip() first to split into individual activities. All crops will be resized so that the maximum dimension is maxdim (and square by default) """ assert trackid is not None or len(self.tracks()) == 1, "Track ID must be provided if there exists more than one track in the scene" trackid = trackid if trackid is not None else list(self.tracks().keys())[0] assert self.hastrack(trackid), "Track ID %s not found - Actortube requires a track ID in the scene (tracks=%s)" % (str(trackid), str(self.tracks())) vid = self.clone().load() # triggers load t = vid.tracks(id=trackid) # actor track frames = [im.padcrop(t[k].maxsquare().dilate(dilate).int()).resize(maxdim, maxdim) for (k,im) in enumerate(vid) if t.during(k)] if len(t)>0 else [] # actor interpolation, padding may introduce frames with no tracks if len(frames) == 0: if not strict: warnings.warn('[vipy.video.actortube]: Empty track for trackid="%s" - Setting actortube to zero' % trackid) frames = [ vid[0].resize(maxdim, maxdim).zeros() ] # empty frame vid.attributes['__actortube'] = {'empty':True} # provenance to reject else: raise ValueError('[vipy.video.actortube]: Empty track for track=%s, trackid=%s' % (str(t), trackid)) vid._tracks = {ti:vipy.object.Track(keyframes=[f for (f,im) in enumerate(frames) for d in im.objects() if d.attributes['trackid'] == ti], # keyframes zero indexed, relative to [frames] boxes=[d for (f,im) in enumerate(frames) for d in im.objects() if d.attributes['trackid'] == ti], # one box per frame category=t.category(), trackid=ti, framerate=self.framerate()) # preserve trackid for (k,(ti,t)) in enumerate(self.tracks().items())} # replace tracks with interpolated boxes relative to tube defined by actor return vid.array(np.stack([im.numpy() for im in frames]))
def add(self, obj, category=None, attributes=None, rangecheck=True, frame=None, fluent=False)
-
Add the object obj to the scene, and return an index to this object for future updates
This function is used to incrementally build up a scene frame by frame. Obj can be one of the following types:
- obj = vipy.object.Detection(), this must be called from within a frame iterator (e.g. for im in video) to get the current frame index
- obj = vipy.object.Track()
- obj = vipy.activity.Activity()
- obj = [xmin, ymin, width, height], with associated category kwarg, this must be called from within a frame iterator to get the current frame index
It is recomended that the objects are added as follows. For a v=vipy.video.Scene():
for im in v: # Do some processing on frame im to detect objects (object_labels, xywh) = object_detection(im) # Add them to the scene, note that each object instance is independent in each frame, use tracks for object correspondence for (lbl,bb) in zip(object_labels, xywh): v.add(bb, lbl) # Do some correspondences to track objects t2 = v.add( vipy.object.Track(...) ) # Update a previous track to add a keyframe v.track(t2).add( ... )
The frame iterator will keep track of the current frame in the video and add the objects in the appropriate place. Alternatively,
v.add(vipy.object.Track(..), frame=k)
Args
obj
- A conformal python object to add to the scene (
Detection
,Track
,Activity
, [xmin, ymin, width, height] category
- Used if obj is an xywh tuple
attributes
- Used only if obj is an xywh tuple
frame
- [int] The frame to add the object
rangecheck
- [bool] If true, check if the object is within the image rectangle and throw an exception if not.
This requires introspecting the video shape using
Video.shape()
. fluent
- [bool] If true, return self instead of the object index
Expand source code Browse git
def add(self, obj, category=None, attributes=None, rangecheck=True, frame=None, fluent=False): """Add the object obj to the scene, and return an index to this object for future updates This function is used to incrementally build up a scene frame by frame. Obj can be one of the following types: - obj = vipy.object.Detection(), this must be called from within a frame iterator (e.g. for im in video) to get the current frame index - obj = vipy.object.Track() - obj = vipy.activity.Activity() - obj = [xmin, ymin, width, height], with associated category kwarg, this must be called from within a frame iterator to get the current frame index It is recomended that the objects are added as follows. For a v=vipy.video.Scene(): ```python for im in v: # Do some processing on frame im to detect objects (object_labels, xywh) = object_detection(im) # Add them to the scene, note that each object instance is independent in each frame, use tracks for object correspondence for (lbl,bb) in zip(object_labels, xywh): v.add(bb, lbl) # Do some correspondences to track objects t2 = v.add( vipy.object.Track(...) ) # Update a previous track to add a keyframe v.track(t2).add( ... ) ``` The frame iterator will keep track of the current frame in the video and add the objects in the appropriate place. Alternatively, ```python v.add(vipy.object.Track(..), frame=k) ``` Args: obj: A conformal python object to add to the scene (`vipy.object.Detection`, `vipy.object.Track`, `vipy.activity.Activity`, [xmin, ymin, width, height] category: Used if obj is an xywh tuple attributes: Used only if obj is an xywh tuple frame: [int] The frame to add the object rangecheck: [bool] If true, check if the object is within the image rectangle and throw an exception if not. This requires introspecting the video shape using `vipy.video.Video.shape`. fluent: [bool] If true, return self instead of the object index """ if isinstance(obj, vipy.object.Detection): assert frame is not None, "add() for vipy.object.Detection() must be added during frame iteration (e.g. for im in video: )" k = frame if obj.hasattribute('trackid') and obj.attributes['trackid'] in self.tracks(): # The attribute "trackid" is set for a detection when interpolating a track at a frame. This is useful for reconstructing a track from previously enumerated detections trackid = obj.attributes['trackid'] self.trackmap(lambda t: t.update(k, obj) if obj.attributes['trackid'] == t.id() else t) return None if not fluent else self else: t = vipy.object.Track(category=obj.category(), keyframes=[k], boxes=[obj], boundary='strict', attributes=obj.attributes, trackid=obj.attributes['trackid'] if obj.hasattribute('trackid') else None, framerate=self.framerate()) if rangecheck and not obj.hasoverlap(width=self.width(), height=self.height()): raise ValueError("Track '%s' does not intersect with frame shape (%d, %d)" % (str(t), self.height(), self.width())) self.tracks()[t.id()] = t # by-reference return t.id() if not fluent else self elif isinstance(obj, vipy.object.Track): if rangecheck and not obj.boundingbox().isinside(vipy.geometry.imagebox(self.shape())): obj = obj.imclip(self.width(), self.height()) # try to clip it, will throw exception if all are bad warnings.warn('[vipy.video.add]: Clipping trackid=%s track="%s" to image rectangle' % (str(obj.id()), str(obj))) if obj.framerate() != self.framerate(): obj.framerate(self.framerate()) # convert framerate of track to framerate of video self.tracks()[obj.id()] = obj # by-reference return obj.id() if not fluent else self elif isinstance(obj, vipy.activity.Activity): if rangecheck and obj.startframe() >= obj.endframe(): raise ValueError("Activity '%s' has invalid (startframe, endframe)=(%d, %d)" % (str(obj), obj.startframe(), obj.endframe())) self.activities()[obj.id()] = obj # by-reference, activity may have no tracks return obj.id() if not fluent else self elif (istuple(obj) or islist(obj)) and len(obj) == 4 and isnumber(obj[0]): assert frame is not None, "add() for obj=xywh must be added at a specific frame" t = vipy.object.Track(category=category, keyframes=[frame], boxes=[vipy.geometry.BoundingBox(xywh=obj)], boundary='strict', attributes=attributes, framerate=self.framerate()) if rangecheck and not t.boundingbox().isinside(vipy.geometry.imagebox(self.shape())): t = t.imclip(self.width(), self.height()) # try to clip it, will throw exception if all are bad warnings.warn('Clipping track "%s" to image rectangle' % (str(t))) self.tracks()[t.id()] = t # by-reference return t.id() if not fluent else self else: raise ValueError('Undefined object type "%s" to be added to scene - Supported types are obj in ["vipy.object.Detection", "vipy.object.Track", "vipy.activity.Activity", "[xmin, ymin, width, height]"]' % str(type(obj)))
def addframe(self, im, frame)
-
Add im=vipy.image.Scene() into vipy.video.Scene() at given frame. The input image must have been generated using im=self[k] for this to be meaningful, so that trackid can be associated
Expand source code Browse git
def addframe(self, im, frame): """Add im=vipy.image.Scene() into vipy.video.Scene() at given frame. The input image must have been generated using im=self[k] for this to be meaningful, so that trackid can be associated""" assert isinstance(im, vipy.image.Scene), "Invalid input - Must be vipy.image.Scene()" assert im.shape() == self.shape(), "Frame input (shape=%s) must be same shape as video (shape=%s)" % (str(im.shape()), str(self.shape())) # Copy framewise vipy.image.Scene() into vipy.video.Scene(). self.numpy()[frame] = im.array() # will trigger copy for bb in im.objects(): self.trackmap(lambda t: t.update(frame, bb) if bb.attributes['trackid'] == t.id() else t) return self
def annotate(self, outfile=None, fontsize=10, captionoffset=(0, 0), textfacecolor='white', textfacealpha=1.0, shortlabel=True, boxalpha=0.25, d_category2color={'Person': 'green', 'Vehicle': 'blue', 'Object': 'red'}, categories=None, nocaption=False, nocaption_withstring=[], mutator=None, timestamp=None, timestampcolor='black', timestampfacecolor='white', verbose=False)
-
Generate a video visualization of all annotated objects and activities in the video.
The annotation video will be at the resolution and framerate of the underlying video, and pixels in this video will now contain the overlay. This function does not play the video, it only generates an annotation video frames. Use show() which is equivalent to annotate().saveas().play()
Args
outfile
- [str] An optional file to stream the anntation to without storing the annotated video in memory
fontsize
- [int] The fontsize of bounding box captions, used by matplotlib
captionoffset
- (tuple) The (x,y) offset relative to the bounding box to place the caption for each box.
textfacecolor
- [str] The color of the text in the bounding box caption.
Must be in
colorlist()
. textfacealpha
- [float] The transparency of the text in the bounding box caption. Must be in [0,1], where 0=transparent and 1=opaque.
shortlabel
- [bool] If true, display the shortlabel for each object in the scene, otherwise show the full category
boxalpha
- [float] The transparency of the box face behind the text. Must be in [0,1], where 0=transparent and 1=opaque.
d_category2color
- [dict]
A dictionary mapping categories of objects in the scene to their box colors.
Named colors must be in
colorlist()
. categories
- [list] Only show these categories, or show them all if None
nocaption_withstring
- [list]: Do not show captions for those detection categories (or shortlabels) containing any of the strings in the provided list
nocaption
- [bool] If true, do not show any captions, just boxes
mutator
- [lambda] A lambda function that will mutate an image to allow for complex visualizations.
This should be a mutator like
mutator_show_trackid()
. timestamp
- [bool] If true, show a semitransparent timestamp (when the annotation occurs, not when the video was collected) with frame number in the upper left corner of the video
timestampcolor
- [str] The color of the timstamp text.
Named colors must be in
colorlist()
. timestampfacecolor
- [str]
The color of the timestamp background.
Named colors must be in
colorlist()
. verbose
- [bool] Show more helpful messages if true
Returns
A
Video
with annotations in the pixels. If outfile is provided, then the returned video will be flushed.Note: In general, this function should not be run on very long videos without the outfile kwarg, as it requires loading the video framewise into memory.
Expand source code Browse git
def annotate(self, outfile=None, fontsize=10, captionoffset=(0,0), textfacecolor='white', textfacealpha=1.0, shortlabel=True, boxalpha=0.25, d_category2color={'Person':'green', 'Vehicle':'blue', 'Object':'red'}, categories=None, nocaption=False, nocaption_withstring=[], mutator=None, timestamp=None, timestampcolor='black', timestampfacecolor='white', verbose=False): """Generate a video visualization of all annotated objects and activities in the video. The annotation video will be at the resolution and framerate of the underlying video, and pixels in this video will now contain the overlay. This function does not play the video, it only generates an annotation video frames. Use show() which is equivalent to annotate().saveas().play() Args: outfile: [str] An optional file to stream the anntation to without storing the annotated video in memory fontsize: [int] The fontsize of bounding box captions, used by matplotlib captionoffset: (tuple) The (x,y) offset relative to the bounding box to place the caption for each box. textfacecolor: [str] The color of the text in the bounding box caption. Must be in `vipy.gui.using_matplotlib.colorlist`. textfacealpha: [float] The transparency of the text in the bounding box caption. Must be in [0,1], where 0=transparent and 1=opaque. shortlabel: [bool] If true, display the shortlabel for each object in the scene, otherwise show the full category boxalpha: [float] The transparency of the box face behind the text. Must be in [0,1], where 0=transparent and 1=opaque. d_category2color: [dict] A dictionary mapping categories of objects in the scene to their box colors. Named colors must be in `vipy.gui.using_matplotlib.colorlist`. categories: [list] Only show these categories, or show them all if None nocaption_withstring: [list]: Do not show captions for those detection categories (or shortlabels) containing any of the strings in the provided list nocaption: [bool] If true, do not show any captions, just boxes mutator: [lambda] A lambda function that will mutate an image to allow for complex visualizations. This should be a mutator like `vipy.image.mutator_show_trackid`. timestamp: [bool] If true, show a semitransparent timestamp (when the annotation occurs, not when the video was collected) with frame number in the upper left corner of the video timestampcolor: [str] The color of the timstamp text. Named colors must be in `vipy.gui.using_matplotlib.colorlist`. timestampfacecolor: [str] The color of the timestamp background. Named colors must be in `vipy.gui.using_matplotlib.colorlist`. verbose: [bool] Show more helpful messages if true Returns: A `vipy.video.Video` with annotations in the pixels. If outfile is provided, then the returned video will be flushed. .. note:: In general, this function should not be run on very long videos without the outfile kwarg, as it requires loading the video framewise into memory. """ assert outfile is None or vipy.util.isvideofile(outfile), "Invalid filename extension for annotated video" if verbose: print('[vipy.video.annotate]: Annotating video ...') f_mutator = mutator if mutator is not None else vipy.image.mutator_show_jointlabel() f_timestamp = (lambda k: '%s %d' % (vipy.util.clockstamp(), k)) if timestamp is True else timestamp if outfile is None: assert self.load().isloaded(), "Load() failed" imgs = [f_mutator(self[k].clone(), k).savefig(fontsize=fontsize, captionoffset=captionoffset, textfacecolor=textfacecolor, textfacealpha=textfacealpha, shortlabel=shortlabel, boxalpha=boxalpha, d_category2color=d_category2color, categories=categories, nocaption=nocaption, timestampcolor=timestampcolor, timestampfacecolor=timestampfacecolor, timestamp=f_timestamp(k) if timestamp is not None else None, figure=1 if k<(len(self)-1) else None, # cleanup on last frame nocaption_withstring=nocaption_withstring).numpy() for k in range(0, len(self))] # Replace pixels with annotated pixels and downcast object to vipy.video.Video (since there are no more objects to show) return vipy.video.Video(array=np.stack([np.array(PIL.Image.fromarray(img).convert('RGB')) for img in imgs], axis=0), framerate=self.framerate(), attributes=self.attributes) # slow for large videos else: # Stream to output video without loading all frames into memory n = self.duration_in_frames_of_videofile() if not self.isloaded() else len(self) vo = vipy.video.Video(filename=outfile, framerate=self.framerate()) with vo.stream(overwrite=True) as so: for (k,im) in enumerate(self.stream()): so.write(f_mutator(im.clone(), k).savefig(fontsize=fontsize, captionoffset=captionoffset, textfacecolor=textfacecolor, textfacealpha=textfacealpha, shortlabel=shortlabel, boxalpha=boxalpha, d_category2color=d_category2color, categories=categories, nocaption=nocaption, timestampcolor=timestampcolor, timestampfacecolor=timestampfacecolor, timestamp=f_timestamp(k) if timestamp is not None else None, figure=1 if k<(n-1) else None, # cleanup on last frame nocaption_withstring=nocaption_withstring).rgb()) return vo
def annotation(self)
-
Return an iterator over annotations in each frame.
for y in self.annotation(): for (bb,a) in y: print((bb,a))
Yields
for each frame yield the tuple
- ( (
Detection
, (tuple ofActivity
performed by the actor in this bounding box)), … )
Note: The preferred method for accessing annotations is a frame iterator, which includes pixels. However, this method provides access to just the annotations without pixels.
Expand source code Browse git
def annotation(self): """Return an iterator over annotations in each frame. ```python for y in self.annotation(): for (bb,a) in y: print((bb,a)) ``` Yields: for each frame yield the tuple: ( (`vipy.object.Detection`, (tuple of `vipy.activity.Activity` performed by the actor in this bounding box)), ... ) .. note:: The preferred method for accessing annotations is a frame iterator, which includes pixels. However, this method provides access to just the annotations without pixels. """ endframe = max([a.endframe() for a in self.activitylist()]+[t.endframe() for (tk,t) in self.tracks().items()]) if (len(self._tracks) > 0 or len(self._activities) > 0) else 0 for k in range(0,endframe): yield tuple( [tuple( [t[k] if t.during(k) else None, tuple( [a for a in self.activitylist() if a.during(k) and a.hastrackoverlap(t)] ) ]) for t in self.tracklist()])
def asfloatmask(self, fg=1.0, bg=0.0)
-
Replace all pixels in foreground boxes with fg, and bg in background, return a copy
Expand source code Browse git
def asfloatmask(self, fg=1.0, bg=0.0): """Replace all pixels in foreground boxes with fg, and bg in background, return a copy""" assert self.isloaded() self.numpy() # convert to writeable numpy array, triggers writeable copy array = np.full( (len(self.load()), self.height(), self.width(), 1), dtype=np.float32, fill_value=bg) for (k,im) in enumerate(self): for bb in im.objects(): if bb.hasintersection(im.imagebox()): array[k, int(round(bb._ymin)):int(round(bb._ymax)), int(round(bb._xmin)):int(round(bb._xmax))] = fg # does not need imclip return vipy.video.Video(array=array, framerate=self.framerate(), colorspace='float')
def assign(self, frame, dets, minconf=0.2, maxhistory=5, activityiou=0.5, trackcover=0.2, trackconfsamples=4, gate=0, activitymerge=True, activitynms=False)
-
Assign a list of
Detection
object detections andActivity
activity detections at frame k to scene tracks and activities by greedy assignment. In-place update.Approach
- This approach is equivalent to greedy, constant velocity SORT tracking (https://arxiv.org/abs/1602.00763)
- Individual detections are assigned to tracks using a greedy velocity only track propagation, sorted by
BoundingBox.maxcover()
and detection confidence within a spatial tracking gate - New tracks are created if the detection is unassigned and above a minimum confidence
- Updated tracks resulting from assignment are stored in
Scene.tracks()
Args
frame
- [int] The frame index to assign the detections into the scene
dets
- [list] A list of
Detection
orActivity
objects as returned from a detector miniou
- [float] the minimum temporal IOU for activity assignment
minconf
- [float] the minimum confidence for a detection to be considered as a new track
maxhistory
- [int] the maximum propagation length of a track with no measurements, the frame history used for velocity estimates
trackconfsamples
- [int] the number of uniformly spaced samples along a track to compute a mean track confidence
gate
- [int] the gating distance in pixels used for assignment of fast moving detections. Useful for low detection framerates if a detection does not overlap with the track.
trackcover
- [float] the minimum cover necessary for assignment of a detection to a track
activitymerge
- [bool] if true, then merge overlapping activity detections of the same track and category, otherwise each activity detection is added as a new detection
activitynms
- [bool] if true, then perform non-maximum suppression of activity detections of the same actor and category that overlap more than activityiou
Returns
This video object with each det assigned to corresponding track or activity.
Expand source code Browse git
def assign(self, frame, dets, minconf=0.2, maxhistory=5, activityiou=0.5, trackcover=0.2, trackconfsamples=4, gate=0, activitymerge=True, activitynms=False): """Assign a list of `vipy.object.Detection` object detections and `vipy.activity.Activity` activity detections at frame k to scene tracks and activities by greedy assignment. In-place update. Approach: - This approach is equivalent to greedy, constant velocity SORT tracking (https://arxiv.org/abs/1602.00763) - Individual detections are assigned to tracks using a greedy velocity only track propagation, sorted by `vipy.geometry.BoundingBox.maxcover` and detection confidence within a spatial tracking gate - New tracks are created if the detection is unassigned and above a minimum confidence - Updated tracks resulting from assignment are stored in `vipy.video.Scene.tracks` Args: frame: [int] The frame index to assign the detections into the scene dets: [list] A list of `vipy.object.Detection` or `vipy.activity.Activity` objects as returned from a detector miniou: [float] the minimum temporal IOU for activity assignment minconf: [float] the minimum confidence for a detection to be considered as a new track maxhistory: [int] the maximum propagation length of a track with no measurements, the frame history used for velocity estimates trackconfsamples: [int] the number of uniformly spaced samples along a track to compute a mean track confidence gate: [int] the gating distance in pixels used for assignment of fast moving detections. Useful for low detection framerates if a detection does not overlap with the track. trackcover: [float] the minimum cover necessary for assignment of a detection to a track activitymerge: [bool] if true, then merge overlapping activity detections of the same track and category, otherwise each activity detection is added as a new detection activitynms: [bool] if true, then perform non-maximum suppression of activity detections of the same actor and category that overlap more than activityiou Returns: This video object with each det assigned to corresponding track or activity. """ assert dets is None or all([isinstance(d, vipy.object.Detection) or isinstance(d, vipy.activity.Activity) for d in tolist(dets)]), "invalid input" assert frame >= 0 and minconf >= 0 and minconf <= 1.0 and maxhistory > 0, "invalid input" if dets is None or len(tolist(dets)) == 0: return self dets = tolist(dets) if any([d.confidence() is None for d in dets]): warnings.warn('Removing %d detections with no confidence' % len([d.confidence() is None for d in dets])) dets = [d for d in dets if d.confidence() is not None] objdets = [d for d in dets if isinstance(d, vipy.object.Detection)] activitydets = [d for d in dets if isinstance(d, vipy.activity.Activity)] # Object detection to track assignment if len(objdets) > 0: # Track propagation: Constant velocity motion model for active tracks t_ref = [(t, t.linear_extrapolation(frame, dt=maxhistory, shape=False)) for (k,t) in self.tracks().items() if ((frame - t.endframe()) <= maxhistory)] trackarea = [ti.area() for (t,ti) in t_ref] detarea = [d.area() for d in objdets] # Track assignment: # - Each track is assigned at most one detection # - Each detection is assigned to at most one track. # - Assignment is the highest confidence maximum overlapping detection by cover within tracking gate trackconf = {t.id():t.confidence(samples=trackconfsamples) for (t, ti) in t_ref} assignments = [(t, d.confidence(), d.iou(ti, area=detarea[j], otherarea=trackarea[i]), d.shapeiou(ti, area=detarea[j], otherarea=trackarea[i]), d.maxcover(ti, area=detarea[j], otherarea=trackarea[i]), d) for (i, (t, ti)) in enumerate(t_ref) for (j,d) in enumerate(objdets) if (t.category() == d.category() and (((ti._xmax if ti._xmax < d._xmax else d._xmax) - (ti._xmin if ti._xmin > d._xmin else d._xmin)) > 0 and ((ti._ymax if ti._ymax < d._ymax else d._ymax) - (ti._ymin if ti._ymin > d._ymin else d._ymin)) > 0))] assigned = set([]) posconf = min([d.confidence() for d in objdets]) if len(objdets)>0 else 0 assignments.sort(key=lambda x: (x[1]+posconf)*(x[2]+x[3]+x[4])+trackconf[x[0].id()], reverse=True) # in-place for (t, conf, iou, shapeiou, cover, d) in assignments: if cover > (trackcover if len(t)>1 else 0): # the highest confidence detection within the assignment gate (or any overlap if not yet enough history for velocity estimate) if (t.id() not in assigned and d.id() not in assigned): # not assigned yet, assign it! self.track(t.id()).update(frame, d.clone()) # track assignment! (clone required) assigned.add(t.id()) # cannot assign again to this track assigned.add(d.id()) # mark detection as assigned # Track spawn from unassigned and unexplained detections for (j,d) in enumerate(objdets): if (d.id() not in assigned): if (d.confidence() >= minconf and not any([t.linear_extrapolation(frame, dt=maxhistory, shape=False).maxcover(d, otherarea=detarea[j]) >= 0.7 for (i,(t,ti)) in enumerate(t_ref) if t.category() == d.category()])): gated = [(t, t.linear_extrapolation(frame, dt=maxhistory, shape=False)) for (t,ti) in t_ref if (t.id() not in assigned and t.category() == d.category())] if gate>0 else [] gated = sorted([(t, ti) for (t, ti) in gated if ti.hasintersection(d, gate=gate)], key=lambda x: d.sqdist(x[1])) if len(gated) > 0: self.track(gated[0][0].id()).update(frame, d.clone()) # track assignment! (clone required) assigned.add(gated[0][0].id()) assigned.add(d.id()) else: assigned.add(self.add(vipy.object.Track(keyframes=[frame], boxes=[d.clone()], category=d.category(), framerate=self.framerate()), rangecheck=False)) # clone required assigned.add(d.id()) # Activity assignment if len(activitydets) > 0: assert all([d.actorid() in self.tracks() for d in activitydets]), "Invalid activity" assigned = set([]) if activitymerge: minframe = min([a._startframe for a in activitydets]) activities = [a for a in self.activities().values() if a._endframe >= minframe] for d in activitydets: for a in activities: if (a._label == d._label) and (a._actorid == d._actorid) and a.hasoverlap(d, activityiou): a.union(d) # activity assignment assigned.add(d._id) break # assigned, early exit if activitynms: minframe = min([a._startframe for a in activitydets]) activities = sorted([a for a in self.activities().values() if a._endframe >= minframe], key=lambda a: a.confidence(), reverse=True) for d in sorted(activitydets, key=lambda x: x.confidence(), reverse=True): for a in activities: if (a._label == d._label) and (a._actorid == d._actorid) and a.hasoverlap(d, activityiou): assigned.add(a._id if d.confidence()>a.confidence() else d._id) # suppressed for id in assigned: if id in self._activities: del self._activities[id] # suppression, faster than self.activityfilter(lambda a: a.id() in assigned) # Activity construction from unassigned detections for d in activitydets: if d._id not in assigned: self.add(d.clone()) return self
def binarymask(self)
-
Replace all pixels in foreground boxes with white, zero in background
Expand source code Browse git
def binarymask(self): """Replace all pixels in foreground boxes with white, zero in background""" for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy im.binarymask() # shared numpy array return self
def blurmask(self, radius=7)
-
Replace all pixels in foreground boxes with gaussian blurred foreground
Expand source code Browse git
def blurmask(self, radius=7): """Replace all pixels in foreground boxes with gaussian blurred foreground""" for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy im.blurmask(radius) # shared numpy array return self
def categories(self)
-
Alias for labels()
Expand source code Browse git
def categories(self): """Alias for labels()""" return self.labels()
def clear(self)
-
Remove all activities and tracks from this object
Expand source code Browse git
def clear(self): """Remove all activities and tracks from this object""" self._activities = {} self._tracks = {} return self
def clearactivities(self)
-
Expand source code Browse git
def clearactivities(self): self._activities = {} return self
def cleartracks(self)
-
Expand source code Browse git
def cleartracks(self): self._tracks = {} return self
def clip(self, startframe, endframe=None)
-
Clip the video to between (startframe, endframe). This clip is relative to clip() shown by repr().
Args
startframe
- [int] the start frame relative to the video framerate() for the clip
endframe
- [int] the end frame relative to the video framerate for the clip, may be none
Returns
This video object, clipped so that a load() will result in frame=0 equivalent to startframe. All tracks and activities updated relative to the new startframe. .. note:
- This return a clone of the video for idempotence - This does not load the video. This updates the ffmpeg filter chain to temporally trim the video. See self.commandline() for the updated filter chain to run.Expand source code Browse git
def clip(self, startframe, endframe=None): """Clip the video to between (startframe, endframe). This clip is relative to clip() shown by __repr__(). Args: startframe: [int] the start frame relative to the video framerate() for the clip endframe: [int] the end frame relative to the video framerate for the clip, may be none Returns: This video object, clipped so that a load() will result in frame=0 equivalent to startframe. All tracks and activities updated relative to the new startframe. .. note: - This return a clone of the video for idempotence - This does not load the video. This updates the ffmpeg filter chain to temporally trim the video. See self.commandline() for the updated filter chain to run. """ assert (endframe is None or startframe <= endframe) and startframe >= 0, "Invalid start and end frames (%s, %s)" % (str(startframe), str(endframe)) v = self.clone() if not v.isloaded(): # -- Copied from super().clip() to allow for clip on clone (for indempotence) # -- This code copy is used to avoid super(Scene, self.clone()) which screws up class inheritance for iPython reload assert not v.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" timestamp_in_seconds = ((v._startframe if v._startframe is not None else 0)+startframe)/float(v.framerate()) v._update_ffmpeg_seek(timestamp_in_seconds) if endframe is not None: v._ffmpeg = v._ffmpeg.setpts('PTS-STARTPTS') # reset timestamp to 0 before trim filter v._ffmpeg = v._ffmpeg.trim(start=0, end=(endframe-startframe)/self.framerate()) # must be in seconds to allow for framerate conversion v._ffmpeg = v._ffmpeg.setpts('PTS-STARTPTS') # reset timestamp to 0 after trim filter v._startframe = startframe if v._startframe is None else v._startframe + startframe # for __repr__ only v._endframe = (v._startframe + (endframe-startframe)) if endframe is not None else v._endframe # for __repr__ only # -- end copy else: endframe = endframe if endframe is not None else len(self._array) v._array = self._array[startframe:endframe] (v._startframe, v._endframe) = (0, endframe-startframe) v._tracks = {k:t.offset(dt=-startframe).truncate(startframe=0, endframe=(endframe-startframe) if endframe is not None else None) for (k,t) in v.tracks().items()} # may be degenerate v._activities = {k:a.offset(dt=-startframe).truncate(startframe=0, endframe=(endframe-startframe) if endframe is not None else None) for (k,a) in v.activities().items()} # may be degenerate return v.trackfilter(lambda t: len(t)>0).activityfilter(lambda a: len(a)>0) # remove degenerate tracks and activities
def combine(self, other, tracks=True, activities=True, rekey=True)
-
Combine the activities and tracks from both scenes into self.
Note: This does not perform a union, it simply combines dictionaries. For deduplication, see
Scene.union()
Expand source code Browse git
def combine(self, other, tracks=True, activities=True, rekey=True): """Combine the activities and tracks from both scenes into self. .. note:: This does not perform a union, it simply combines dictionaries. For deduplication, see `vipy.video.Scene.union` """ assert isinstance(other, Scene), "Invalid input - must be vipy.video.Scene() object and not type=%s" % str(type(other)) assert self.framerate() == other.framerate() o = other.clone(rekey=True) if rekey else other # make sure keys are unique if activities: self.activities().update(o.activities()) if tracks: self.tracks().update(o.tracks()) return self
def crop(self, bb, zeropad=True)
-
Crop the video using the supplied box, update tracks relative to crop, video is zeropadded if box is outside frame rectangle
Expand source code Browse git
def crop(self, bb, zeropad=True): """Crop the video using the supplied box, update tracks relative to crop, video is zeropadded if box is outside frame rectangle""" assert isinstance(bb, vipy.geometry.BoundingBox), "Invalid input" bb = bb.int() bbc = bb.clone().imclipshape(self.width(), self.height()).int() #if zeropad and bb != bbc: # self.zeropad(bb.width()-bbc.width(), bb.height()-bbc.height()) # bb = bb.offset(bb.width()-bbc.width(), bb.height()-bbc.height()) super().crop(bb, zeropad=zeropad) # range check handled here to correctly apply zeropad bb = bb if zeropad else bbc self._tracks = {k:t.offset(dx=-bb.xmin(), dy=-bb.ymin()) for (k,t) in self.tracks().items()} return self
def csv(self, outfile=None)
-
Export scene to CSV file format with header. If there are no tracks, this will be empty.
Expand source code Browse git
def csv(self, outfile=None): """Export scene to CSV file format with header. If there are no tracks, this will be empty. """ assert self.load().isloaded() csv = [(self.filename(), # video filename k, # frame number (zero indexed) d.category(), d.shortlabel(), # track category and shortlabel (displayed in caption) ';'.join([self.activities(id=aid).category() for aid in tolist(d.attributes['activityid'])] if 'activityid' in d.attributes else ''), # semicolon separated activity category associated with track d.xmin(), d.ymin(), d.width(), d.height(), # bounding box d.attributes['trackid'], # globally unique track ID ';'.join([aid for aid in tolist(d.attributes['activityid'])] if 'activityid' in d.attributes else '')) # semicolon separated activity ID associated with track for (k,im) in enumerate(self) for d in im.objects()] csv = [('# video_filename', 'frame_number', 'object_category', 'object_shortlabel', 'activity categories(;)', 'xmin', 'ymin', 'width', 'height', 'track_id', 'activity_ids(;)')] + csv return writecsv(csv, outfile) if outfile is not None else csv
def dedupe(self, spatial_iou_threshold=0.8, dt=5, tracks=True, activities=True, temporal_iou_threshold=0.8, verbose=True)
-
Find and delete duplicate tracks and activities by overlap.
Track deduplication algorithm
- For each pair of tracks with the same category, find the larest temporal segment that contains both tracks.
- For this segment, compute the IOU for each box interpolated at a stride of dt frames
- Compute the mean IOU for this segment. This is the segment IOU.
- If the segment IOU is greater than the threshold, merge the shorter of the two tracks with the current track.
Activity deduplication algorithm
- For each pair of activities in insertion order
- If the temporal IOU is greater than the threshold, then merge the older activity (later insertion) with the newer activity (earlier insertion)
- Update the actor ID of the merged activity to be that of the newer activity
Expand source code Browse git
def dedupe(self, spatial_iou_threshold=0.8, dt=5, tracks=True, activities=True, temporal_iou_threshold=0.8, verbose=True): """Find and delete duplicate tracks and activities by overlap. Track deduplication algorithm - For each pair of tracks with the same category, find the larest temporal segment that contains both tracks. - For this segment, compute the IOU for each box interpolated at a stride of dt frames - Compute the mean IOU for this segment. This is the segment IOU. - If the segment IOU is greater than the threshold, merge the shorter of the two tracks with the current track. Activity deduplication algorithm - For each pair of activities in insertion order - If the temporal IOU is greater than the threshold, then merge the older activity (later insertion) with the newer activity (earlier insertion) - Update the actor ID of the merged activity to be that of the newer activity """ if tracks: deleted = set([]) for tj in sorted(self.tracklist(), key=lambda t: len(t), reverse=True): # longest to shortest for (s, ti) in sorted([(0,t) if (len(tj) < len(t) or t.id() in deleted or t.id() == tj.id() or t.category() != tj.category()) else (tj.fragmentiou(t, dt=dt), t) for t in self.tracklist()], key=lambda x: x[0], reverse=True): if s > spatial_iou_threshold: # best mean framewise overlap during overlapping segment of two tracks (ti, tj) if verbose: print('[vipy.video.dedupe]: merging duplicate track "%s" (id=%s) which overlaps with "%s" (id=%s)' % (ti, ti.id(), tj, tj.id())) self.tracks()[tj.id()] = tj.union(ti) # merge self.activitymap(lambda a: a.replace(ti, tj)) # replace merged track reference in activity deleted.add(ti.id()) self.trackfilter(lambda t: t.id() not in deleted) # remove duplicate tracks if activities: deleted = set([]) for (j,aj) in enumerate(self.activitylist()): # preserve insertion order for ai in self.activitylist()[j+1:]: if aj.hasoverlap(ai, threshold=temporal_iou_threshold) and ai.id() not in deleted: if verbose: print('[vipy.video.dedupe]: merging duplicate activity "%s" (id=%s) which overlaps with "%s" (id=%s)' % (ai, ai.id(), aj, aj.id())) self.activities()[aj.id()] = aj.union(ai.clone().replaceid(ai.actorid(), aj.actorid())).addid(ai.actorid()) # merge two activities into one, with two tracks deleted.add(ai.id()) self.activityfilter(lambda a: a.id() not in deleted) # remove duplicate activities return self
def delete(self, id)
-
Delete a given track or activity by id, if present
Expand source code Browse git
def delete(self, id): """Delete a given track or activity by id, if present""" return self.trackfilter(lambda t: t.id() != id).activityfilter(lambda a: a.id() != id)
def downcast(self)
-
Cast the object to a
Video
classExpand source code Browse git
def downcast(self): """Cast the object to a `vipy.video.Video` class""" self.__class__ = vipy.video.Video return self
def during(self, frameindex)
-
Expand source code Browse git
def during(self, frameindex): try: self.__getitem__(frameindex) # triggers load return True except: return False
def extrapolate(self, f, dt=None)
-
Extrapolate the video to frame f and add the extrapolated tracks to the video
Expand source code Browse git
def extrapolate(self, f, dt=None): """Extrapolate the video to frame f and add the extrapolated tracks to the video""" return self.trackmap(lambda t: t.add(f, t.linear_extrapolation(f, dt=dt if dt is not None else self.framerate()), strict=False))
def fgmask(self)
-
Replace all pixels in foreground boxes with zero
Expand source code Browse git
def fgmask(self): """Replace all pixels in foreground boxes with zero""" for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy im.fgmask() # shared numpy array return self
def first_activity(self)
-
Return the first activity of the video with the earliest start frame
Expand source code Browse git
def first_activity(self): """Return the first activity of the video with the earliest start frame""" return sorted(self.activitylist(), key=lambda a: a.startframe())[0] if len(self._activities)>0 else None
def frame(self, k=0, img=None, noimage=False, t=None)
-
Return
Scene
object at frame k-The attributes of each of the <code><a title="vipy.image.Scene.objects" href="image.html#vipy.image.Scene.objects">Scene.objects()</a></code> in the scene contains helpful metadata for the provenance of the detection, including: - 'trackid' of the track this detection - 'activityid' associated with this detection - 'jointlabel' of this detection, used for visualization - 'noun verb' of this detection, used for visualization Args: k: [int >= 0] The frame index requested. This is relative to the current frame rate of the video. img: [numpy, None] An optional image to be used for this frame. This is useful to construct frames efficiently for videos if the pixel buffer is already available from a stream rather than a preview. noimage [bool]: If True, then return only annotations at frame k with empty frame buffer (e.g. no image pixels in the returned image object) t: [float >= 0] The frame time requested. This is converted into a frame index using the current framerate of the video. Return: A <code><a title="vipy.image.Scene" href="image.html#vipy.image.Scene">Scene</a></code> object for frame k containing all objects in this frame and pixels if img != None or preview=True !!! note "Note" - Modifying this frame will not affect the source video - If multiple objects are associated with an activity and a primary actor is defined, then only the primary actor is displayed as "Noun Verbing", objects are shown as "Noun" with the activityid in the attribute - If noun is associated with more than one activity, then this is shown as "Noun Verbing1
Noun Verbing2", with a newline separator
Expand source code Browse git
def frame(self, k=0, img=None, noimage=False, t=None): """Return `vipy.image.Scene` object at frame k -The attributes of each of the `vipy.image.Scene.objects` in the scene contains helpful metadata for the provenance of the detection, including: - 'trackid' of the track this detection - 'activityid' associated with this detection - 'jointlabel' of this detection, used for visualization - 'noun verb' of this detection, used for visualization Args: k: [int >= 0] The frame index requested. This is relative to the current frame rate of the video. img: [numpy, None] An optional image to be used for this frame. This is useful to construct frames efficiently for videos if the pixel buffer is already available from a stream rather than a preview. noimage [bool]: If True, then return only annotations at frame k with empty frame buffer (e.g. no image pixels in the returned image object) t: [float >= 0] The frame time requested. This is converted into a frame index using the current framerate of the video. Return: A `vipy.image.Scene` object for frame k containing all objects in this frame and pixels if img != None or preview=True .. note:: - Modifying this frame will not affect the source video - If multiple objects are associated with an activity and a primary actor is defined, then only the primary actor is displayed as "Noun Verbing", objects are shown as "Noun" with the activityid in the attribute - If noun is associated with more than one activity, then this is shown as "Noun Verbing1\nNoun Verbing2", with a newline separator """ k = int(self.framerate()*t) if t is not None else k assert isinstance(k, int) and k>=0, "Frame index must be non-negative integer" assert img is not None or (self.isloaded() and k<len(self)) or not self.isloaded(), "Invalid frame index %d - Indexing video by frame must be integer within (0, %d)" % (k, len(self)-1) img = img if (img is not None or noimage) else (self._array[k] if self.isloaded() else self.preview(k).array()) dets = [t[k].clone(deep=True).setattribute('trackindex', j) for (j, t) in enumerate(self.tracklist()) if len(t)>0 and (t.during(k) or t.boundary()=='extend')] # track interpolation (cloned) with boundary handling for d in dets: d.attributes['activityid'] = [] # reset jointlabel = [(d.shortlabel(),'')] # [(Noun, Verbing1), (Noun, Verbing2), ...], initialized with empty verbs as [(Noun, ""), ... ] activityconf = [None] # for display for (aid, a) in self.activities().items(): # insertion order: First activity is primary, next is secondary (not in confidence order) if a.hastrack(d.attributes['trackid']) and a.during(k): # Display assumptions: # - Jointlabel is always displayed as "Noun Verbing" during activity (e.g. Person Carrying, Vehicle Turning) using noun=track shortlabel, verb=activity shortlabel # - If noun is associated with more than one activity, then this is shown as "Noun Verbing1\nNoun Verbing2", with a newline separator # - If multiple objects are associated with an activity and a primary actor is defined, then only the primary actor is displayed as "Noun Verbing", objects are shown as "Noun" with the activityid in the attributes if (a.actorid() is None or (a.actorid() == d.attributes['trackid'])) and not any([a.shortlabel() == v for (n,v) in jointlabel]): jointlabel.append( (d.shortlabel(), a.shortlabel()) ) # only show each activity once (even if repeated) activityconf.append(a.confidence()) d.attributes['activityid'].append(a.id()) # for activity correspondence (if desired) # For display purposes # - See `vipy.image.mutator_show_trackindex_verbonly` # - Double prepended underscore attributes are private and cleaned using `vipy.image.Image.sanitize` d.attributes['__jointlabel'] = '\n'.join([('%s %s' % (n,v)).strip() for (n,v) in jointlabel[0 if len(jointlabel)==1 else 1:]]) # to be deprecated d.attributes['__noun verb'] = jointlabel[0 if len(jointlabel)==1 else 1:] d.attributes['__activityconf'] = activityconf[0 if len(jointlabel)==1 else 1:] d.attributes['__trackindex'] = d.attributes['trackindex'] # trackindex to be replaced with __trackindex d.attributes['__trackid'] = d.attributes['trackid'] # trackid to be replaced with __trackid d.attributes['__activityid'] = d.attributes['activityid'] # activityid to be replaced with __activityid dets.sort(key=lambda d: (d.confidence() if d.confidence() is not None else 0, d.shortlabel())) # layering in video is ordered by decreasing track confidence and alphabetical shortlabel return vipy.image.Scene(array=img, colorspace=self.colorspace(), objects=dets, category=self.category())
def framebox(self)
-
Return the bounding box for the image rectangle.
Returns
A
BoundingBox
which defines the image rectangle .. notes: This requires callingVideo.preview()
to get the frame shape from the current filter chain, which touches the video fileExpand source code Browse git
def framebox(self): """Return the bounding box for the image rectangle. Returns: A `vipy.geometry.BoundingBox` which defines the image rectangle .. notes: This requires calling `vipy.video.Video.preview` to get the frame shape from the current filter chain, which touches the video file""" return vipy.geometry.BoundingBox(xmin=0, ymin=0, width=self.width(), height=self.height())
def framecomposite(self, n=2, dt=10, mindim=256)
-
Generate a single composite image with minimum dimension mindim as the uniformly blended composite of n frames each separated by dt frames
Expand source code Browse git
def framecomposite(self, n=2, dt=10, mindim=256): """Generate a single composite image with minimum dimension mindim as the uniformly blended composite of n frames each separated by dt frames""" if not self.isloaded(): self.mindim(mindim).load() imframes = [self.frame(k).maxmatte() for k in range(0, dt*n, dt)] img = np.uint8(np.sum([1/float(n)*im.array() for im in imframes], axis=0)) return imframes[0].clone().array(img)
def framerate(self, fps=None)
-
Change the input framerate for the video and update frame indexes for all annotations.
fps = self.framerate() self.framerate(fps=15.0)
Expand source code Browse git
def framerate(self, fps=None): """Change the input framerate for the video and update frame indexes for all annotations. ```python fps = self.framerate() self.framerate(fps=15.0) ``` """ if fps is None: return self._framerate elif float(fps) == self._framerate: return self else: assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" fps = float(fps) self._startframe = int(round(self._startframe * (fps/self._framerate))) if self._startframe is not None else self._startframe # __repr__ only self._endframe = int(round(self._endframe * (fps/self._framerate))) if self._endframe is not None else self._endframe # __repr__only self._tracks = {k:t.framerate(fps) for (k,t) in self.tracks().items()} self._activities = {k:a.framerate(fps) for (k,a) in self.activities().items()} if 'fps=' in self._ffmpeg_commandline(): self._update_ffmpeg('fps', fps) # replace fps filter, do not add to it else: self._ffmpeg = self._ffmpeg.filter('fps', fps=fps, round='up') # create fps filter first time self._framerate = fps return self
def hasactivities(self)
-
Does this video have any activities?
Expand source code Browse git
def hasactivities(self): """Does this video have any activities?""" return len(self._activities) > 0
def hasactivity(self, activityid)
-
Does this video have this activity id?
Expand source code Browse git
def hasactivity(self, activityid): """Does this video have this activity id?""" return activityid in self.activities()
def hastrack(self, trackid)
-
Does the video have this trackid?
Note: Track IDs are available as vipy.object.Track().id()
Expand source code Browse git
def hastrack(self, trackid): """Does the video have this trackid? .. note:: Track IDs are available as vipy.object.Track().id() """ return trackid in self.tracks()
def hastracks(self)
-
Does this video have any tracks?
Expand source code Browse git
def hastracks(self): """Does this video have any tracks?""" return len(self._tracks) > 0
def instanceid(self, newid=None)
-
Return an annotation instance identifier for this video.
An instance ID is a unique identifier for a ground truth annotation within a video, either a track or an activity. More than one instance ID may share the same video ID if they are from the same source videofile.
This is useful when calling
Scene.activityclip()
orScene.activitysplit()
to clip a video into segments such that each clip has a unique identifier, but all share the same underlyingVideo.videoid()
. This is useful when callingScene.trackclip()
orScene.tracksplit()
to clip a video into segments such that each clip has a unique identifier, but all share the same underlyingVideo.videoid()
.Returns
INSTANCEID
- if 'instance_id' key is in self.attribute
VIDEOID_INSTANCEID
- if '_instance_id' key is in self.attribute, as set by activityclip() or trackclip(). This is set using INSTANCE_ID=ACTIVITYID_ACTIVITYINDEX or INSTANCEID=TRACKID_TRACKINDEX, where the index is the temporal order of the annotation in the source video prior to clip().
VIDEOID_ACTIVITYINDEX
- if 'activityindex' key is in self.attribute, as set by activityclip(). (fallback for legacy datasets).
VIDEOID
- otherwise
Expand source code Browse git
def instanceid(self, newid=None): """Return an annotation instance identifier for this video. An instance ID is a unique identifier for a ground truth annotation within a video, either a track or an activity. More than one instance ID may share the same video ID if they are from the same source videofile. This is useful when calling `vipy.video.Scene.activityclip` or `vipy.video.Scene.activitysplit` to clip a video into segments such that each clip has a unique identifier, but all share the same underlying `vipy.video.Video.videoid`. This is useful when calling `vipy.video.Scene.trackclip` or `vipy.video.Scene.tracksplit` to clip a video into segments such that each clip has a unique identifier, but all share the same underlying `vipy.video.Video.videoid`. Returns: INSTANCEID: if 'instance_id' key is in self.attribute VIDEOID_INSTANCEID: if '_instance_id' key is in self.attribute, as set by activityclip() or trackclip(). This is set using INSTANCE_ID=ACTIVITYID_ACTIVITYINDEX or INSTANCEID=TRACKID_TRACKINDEX, where the index is the temporal order of the annotation in the source video prior to clip(). VIDEOID_ACTIVITYINDEX: if 'activityindex' key is in self.attribute, as set by activityclip(). (fallback for legacy datasets). VIDEOID: otherwise """ if newid is not None: self.setattribute('instance_id', newid) return self else: if 'instance_id' in self.attributes: return self.attributes['instance_id'] # set at video creation time (e.g. pycollector) elif '_instance_id' in self.attributes: return self.attributes['_instance_id'] # set at activityclip() time for provenance from clips back to videos elif 'activityindex' in self.attributes: return '%s_%s' % (self.videoid(), str(self.attributes['activityindex'])) # set at activityclip() time for provenance from clips back to videos (deprecated) else: return self.videoid()
def isdegenerate(self)
-
Degenerate scene has empty or malformed tracks
Expand source code Browse git
def isdegenerate(self): """Degenerate scene has empty or malformed tracks""" return len(self.tracklist()) == 0 or any([t.isempty() or t.isdegenerate() for t in self.tracklist()])
def json(self, encode=True)
-
Return JSON encoded string of this object. This may fail if attributes contain non-json encodeable object
Expand source code Browse git
def json(self, encode=True): """Return JSON encoded string of this object. This may fail if attributes contain non-json encodeable object""" try: json.loads(json.dumps(self.attributes)) # round trip for the attributes dictionary - this can be any arbitrary object and contents may not be json encodable except: raise ValueError('Video contains non-JSON encodable object in self.attributes dictionary - Try self.sanitize() or to clear with self.attributes = {} first') d = json.loads(super().json()) d['tracks'] = {k:t.json(encode=False) for (k,t) in self.tracks().items()} d['activities'] = {k:a.json(encode=False) for (k,a) in self.activities().items()} try: return json.dumps(d) if encode else d except: # Legacy support for non JSON serializable objects (<= vipy.1.9.2) v = self.clone() for (ti, t) in v.tracks().items(): for o in t._keyboxes: vipy.geometry.BoundingBox.cast(o, flush=True) o.float().significant_digits(2) for (ai, a) in v.activities().items(): a._startframe = int(a._startframe) a._endframe = int(a._endframe) return v.json(encode=encode)
def label(self)
-
Return an iterator over labels in each frame
Expand source code Browse git
def label(self): """Return an iterator over labels in each frame""" endframe = max([a.endframe() for a in self.activitylist()]+[t.endframe() for (tk,t) in self.tracks().items()]) if (len(self._tracks) > 0 or len(self._activities) > 0) else 0 for k in range(0,endframe): yield self.labels(k)
def labeled_frames(self)
-
Iterate over frames, yielding tuples (activity+object labelset in scene, vipy.image.Scene())
Expand source code Browse git
def labeled_frames(self): """Iterate over frames, yielding tuples (activity+object labelset in scene, vipy.image.Scene())""" self.load() for k in range(0, len(self)): #self._currentframe = k # used only for incremental add() yield (self.labels(k), self.__getitem__(k)) #self._currentframe = None
def labels(self, k=None)
-
Return a set of all object and activity labels in this scene, or at frame int(k)
Expand source code Browse git
def labels(self, k=None): """Return a set of all object and activity labels in this scene, or at frame int(k)""" return self.activitylabels(k).union(self.objectlabels(k))
def last_activity(self)
-
Return the last activity of the video with the latest end frame
Expand source code Browse git
def last_activity(self): """Return the last activity of the video with the latest end frame""" return sorted(self.activitylist(), key=lambda a: a.endframe())[-1] if len(self._activities)>0 else None
def meanmask(self)
-
Replace all pixels in foreground boxes with mean color
Expand source code Browse git
def meanmask(self): """Replace all pixels in foreground boxes with mean color""" for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy im.meanmask() # shared numpy array return self
def merge_tracks(self, dilate_height=2.0, dilate_width=2.0, framedist=5)
-
Merge tracks if a track endpoint dilated by a fraction overlaps exactly one track startpoint, and the endpoint and startpoint are close enough together temporally.
Note
- This is useful for continuing tracking when the detection framerate was too low and the assignment falls outside the measurement gate.
- This will not work for complex scenes, as it assumes that there is exactly one possible continuation for a track.
Expand source code Browse git
def merge_tracks(self, dilate_height=2.0, dilate_width=2.0, framedist=5): """Merge tracks if a track endpoint dilated by a fraction overlaps exactly one track startpoint, and the endpoint and startpoint are close enough together temporally. .. note:: - This is useful for continuing tracking when the detection framerate was too low and the assignment falls outside the measurement gate. - This will not work for complex scenes, as it assumes that there is exactly one possible continuation for a track. """ merged = set([]) for ti in sorted(self.tracklist(), key=lambda t: t.startframe()): for tj in sorted(self.tracklist(), key=lambda t: t.startframe()): if (tj.id() not in merged) and (ti.id() != tj.id()) and (tj.startframe() >= ti.endframe()) and ((tj.startframe()-ti.endframe()) <= framedist) and (ti.category() == tj.category()): di = ti[ti.endframe()].dilate_height(dilate_height).dilate_width(dilate_width) dj = tj[tj.startframe()] if di.iou(dj) > 0 and not any([di.iou(tk[tj.startframe()]) > 0 for tk in self.tracklist() if (tk.id() not in [ti.id(), tj.id()]) and tk.during(tj.startframe())]): self.tracks()[ti.id()] = ti.union(tj) # Merge tracks that are within gating distance self.delete(tj.id()) # remove merged track merged.add(tj.id()) break return self
def next_activity(self, id)
-
Return the next activity just after the given activityid
Expand source code Browse git
def next_activity(self, id): """Return the next activity just after the given activityid""" assert id in self.activities() A = self.activitylist() k = [k for (k,a) in enumerate(A) if a.id() == id][0] return A[k+1] if k<len(A)-1 else None
def noactivityclip(self, label=None, shortlabel=None, padframes=0)
-
Return a list of vipy.video.Scene() each clipped on a track segment that has no associated activities.
Args
label
- [str] The activity label to give the background activities. Defaults to the track category (lowercase)
shortlabel
- [str] The activity shortlabel to give the background activities when visualized. Defaults to the track category (lowercase)
padframes
- [int]
The amount of temporal padding to apply to the clips before and after in frames.
See
Scene.activityclip()
for options.
Returns
A list of
Scene
each cloned from the source video and clipped in the temporal region between activities. The union of activityclip() and noactivityclip() should equal the entire video.Notes
- Each clip will contain exactly one activity "Background" which is the interval for this track where no activities are occurring
- Each clip will be at least one frame long
Expand source code Browse git
def noactivityclip(self, label=None, shortlabel=None, padframes=0): """Return a list of vipy.video.Scene() each clipped on a track segment that has no associated activities. Args: label: [str] The activity label to give the background activities. Defaults to the track category (lowercase) shortlabel: [str] The activity shortlabel to give the background activities when visualized. Defaults to the track category (lowercase) padframes: [int] The amount of temporal padding to apply to the clips before and after in frames. See `vipy.video.Scene.activityclip` for options. Returns: A list of `vipy.video.Scene` each cloned from the source video and clipped in the temporal region between activities. The union of activityclip() and noactivityclip() should equal the entire video. .. notes:: - Each clip will contain exactly one activity "Background" which is the interval for this track where no activities are occurring - Each clip will be at least one frame long """ A = self.clone().activities(self.noactivitylist(label=label, shortlabel=shortlabel)).activityclip(padframes=padframes, multilabel=False) return [a.setattribute('_instance_id', '%s_bg' % a.getattribute('_instance_id')) for a in A]
def noactivitylist(self, label=None, shortlabel=None)
-
Return a list of
Activity
which are segments of each track with no associated activities.Args
label
- [str] The activity label to give the background activities. Defaults to the track category (lowercase)
Returns
A list of
Activity
such that each activity is associated with a track with temporal support where no activities are performed. The union of activitylist() and noactivitylist() should cover the temporal support of all trackExpand source code Browse git
def noactivitylist(self, label=None, shortlabel=None): """Return a list of `vipy.activity.Activity` which are segments of each track with no associated activities. Args: label: [str] The activity label to give the background activities. Defaults to the track category (lowercase) Returns: A list of `vipy.activity.Activity` such that each activity is associated with a track with temporal support where no activities are performed. The union of activitylist() and noactivitylist() should cover the temporal support of all track """ A = [] for t in self.tracklist(): (startframe, endframe) = (t.startframe(), t.startframe()) for k in range(t.startframe(), t.endframe()): if not any([a.hastrack(t) and a.during(k) for a in self.activitylist()]) and k < (t.endframe()-1): endframe = k # background else: if startframe < endframe: A.append(vipy.activity.Activity(label=t.category() if label is None else label, shortlabel='' if label is None else (label if shortlabel is None else shortlabel), startframe=startframe, endframe=endframe, actorid=t.id(), framerate=self.framerate(), attributes={'_noactivitylist':True})) (startframe, endframe) = (k+1,k+1) return A
def objectlabels(self, k=None, lower=False)
-
Return a python set of all activity categories in this scene, or at frame k.
Args
k
- [int] The object labels present at frame k. If k=None, then all object labels in the video
lower
- [bool] If true, return the object labels in alll lower case for case invariant string comparisonsn
Expand source code Browse git
def objectlabels(self, k=None, lower=False): """Return a python set of all activity categories in this scene, or at frame k. Args: k: [int] The object labels present at frame k. If k=None, then all object labels in the video lower: [bool] If true, return the object labels in alll lower case for case invariant string comparisonsn """ return set([t.category() if not lower else t.category().lower() for t in self.tracks().values() if k is None or t.during(k)])
def objects(self, casesensitive=True)
-
The objects in a scene are the unique categories of tracks
Expand source code Browse git
def objects(self, casesensitive=True): """The objects in a scene are the unique categories of tracks""" return sorted(list(set([t.category() if casesensitive else t.category().lower() for t in self.tracklist()])))
def pack(self)
-
Packing a scene returns the scene with the annotations JSON serialized.
- This is useful for fast garbage collection when there are many objects in memory
- This is useful for distributed processing prior to serializing from a scheduler to a client
- This is useful for lazy deserialization of complex attributes when loading many videos into memory
- Unpacking is transparent to the end user and is performed on the fly when annotations are accessed. There is no unpack() method.
- See the notes in from_json() for why this helps with nested containers and reference cycle tracking with the python garbage collector
Expand source code Browse git
def pack(self): """Packing a scene returns the scene with the annotations JSON serialized. - This is useful for fast garbage collection when there are many objects in memory - This is useful for distributed processing prior to serializing from a scheduler to a client - This is useful for lazy deserialization of complex attributes when loading many videos into memory - Unpacking is transparent to the end user and is performed on the fly when annotations are accessed. There is no unpack() method. - See the notes in from_json() for why this helps with nested containers and reference cycle tracking with the python garbage collector """ d = json.loads(self.json()) self._tracks = tuple([x if isinstance(x, str) else str(json.dumps(x)) for x in d['tracks'].values()]) # efficient garbage collection: store as a packed string to avoid reference cycle tracking, unpack on demand self._activities = tuple([x if isinstance(x, str) else str(json.dumps(x)) for x in d['activities'].values()]) # efficient garbage collection: store as a packed string to avoid reference cycle tracking, unpack on demand return self
def pixelate(self, radius=16)
-
Alias for pixelmask()
Expand source code Browse git
def pixelate(self, radius=16): """Alias for pixelmask()""" return self.pixelmask(pixelsize=radius)
def pixelize(self, radius=16)
-
Alias for pixelmask()
Expand source code Browse git
def pixelize(self, radius=16): """Alias for pixelmask()""" return self.pixelmask(pixelsize=radius)
def pixelmask(self, pixelsize=8)
-
Replace all pixels in foreground boxes with pixelation (e.g. bigger pixels, like privacy glass)
Expand source code Browse git
def pixelmask(self, pixelsize=8): """Replace all pixels in foreground boxes with pixelation (e.g. bigger pixels, like privacy glass)""" for im in self.mutable(): # convert to writeable numpy array, triggers writeable copy im.pixelmask(pixelsize) # shared numpy array return self
def prev_activity(self, id)
-
Return the previous activity just before the given activityid
Expand source code Browse git
def prev_activity(self, id): """Return the previous activity just before the given activityid""" assert id in self.activities() A = self.activitylist() k = [k for (k,a) in enumerate(A) if a.id() == id][0] return A[k-1] if k>=1 else None
def primary_activity(self)
-
Return the primary activity of the video.
- The primary activity is the first activity in the activitylist.
- This is useful for activityclip() videos that are centered on a single activity
Returns
Activity
that is first in theScene.activitylist()
Expand source code Browse git
def primary_activity(self): """Return the primary activity of the video. - The primary activity is the first activity in the activitylist. - This is useful for activityclip() videos that are centered on a single activity Returns: `vipy.activity.Activity` that is first in the `vipy.video.Scene.activitylist` """ return next(iter(self.activities().values())) if len(self._activities)>0 else None # Python >=3.6
def quicklook(self, n=9, dilate=1.5, mindim=256, fontsize=10, context=False, startframe=0, animate=False, dt=30, thumbnail=None)
-
Generate a montage of n uniformly spaced annotated frames centered on the union of the labeled boxes in the current frame to show the activity ocurring in this scene at a glance Montage increases rowwise for n uniformly spaced frames, starting from frame zero and ending on the last frame. This quicklook is most useful when len(self.activities()==1) for generating a quicklook from an activityclip().
Args
n
- [int]: Number of images in the quicklook
dilate
- [float]: The dilation factor for the bounding box prior to crop for display
mindim
- [int]: The minimum dimension of each of the elemenets in the montage
fontsize
- [int]: The size of the font for the bounding box label
context
- [bool]: If true, replace the first and last frame in the montage with the full frame annotation, to help show the scale of the scene
animate
- [bool]: If true, return a video constructed by animating the quicklook into a video by showing dt consecutive frames
dt
- [int]: The number of frames for animation
startframe
- [int]: The initial frame index to start the n uniformly sampled frames for the quicklook
thumbnail [
vipy.image.Image
]: If provided, prepend the first element in the montage with this thumbnail. This is useful for showing a high resolution image (e.g. a face, small object) to be contained in the video for review.Expand source code Browse git
def quicklook(self, n=9, dilate=1.5, mindim=256, fontsize=10, context=False, startframe=0, animate=False, dt=30, thumbnail=None): """Generate a montage of n uniformly spaced annotated frames centered on the union of the labeled boxes in the current frame to show the activity ocurring in this scene at a glance Montage increases rowwise for n uniformly spaced frames, starting from frame zero and ending on the last frame. This quicklook is most useful when len(self.activities()==1) for generating a quicklook from an activityclip(). Args: n: [int]: Number of images in the quicklook dilate: [float]: The dilation factor for the bounding box prior to crop for display mindim: [int]: The minimum dimension of each of the elemenets in the montage fontsize: [int]: The size of the font for the bounding box label context: [bool]: If true, replace the first and last frame in the montage with the full frame annotation, to help show the scale of the scene animate: [bool]: If true, return a video constructed by animating the quicklook into a video by showing dt consecutive frames dt: [int]: The number of frames for animation startframe: [int]: The initial frame index to start the n uniformly sampled frames for the quicklook thumbnail [`vipy.image.Image`]: If provided, prepend the first element in the montage with this thumbnail. This is useful for showing a high resolution image (e.g. a face, small object) to be contained in the video for review. """ if not self.isloaded(): self.load() # triggers load() into memory, user should self.flush() to free if animate: return Video(frames=[self.quicklook(n=n, dilate=dilate, mindim=mindim, fontsize=fontsize, context=context, startframe=k, animate=False, dt=dt, thumbnail=thumbnail) for k in range(0, min(dt, len(self)))], framerate=self.framerate()) f_mutator = vipy.image.mutator_show_jointlabel() framelist = [min(int(np.round(f))+startframe, len(self)-1) for f in np.linspace(0, len(self)-1, n)] isdegenerate = [self.frame(k).boundingbox() is None or self.frame(k).boundingbox().dilate(dilate).intersection(self.framebox(), strict=False).isdegenerate() for (j,k) in enumerate(framelist)] imframes = [self.frame(k).maxmatte() # letterbox or pillarbox if (isdegenerate[j] or (context is True and (j == 0 or j == (n-1)))) else self.frame(k).padcrop(self.frame(k).boundingbox().dilate(dilate).imclipshape(self.width(), self.height()).maxsquare().int()).mindim(mindim, interp='nearest') for (j,k) in enumerate(framelist)] imframes = [f_mutator(im) for im in imframes] # show jointlabel from frame interpolation imframes = [im.savefig(fontsize=fontsize, figure=1).rgb() for im in imframes] # temp storage in memory if thumbnail is not None: assert isinstance(thumbnail, vipy.image.Image) imframes = [thumbnail.maxmatte().mindim(mindim)] + imframes return vipy.visualize.montage(imframes, imgwidth=mindim, imgheight=mindim)
def rekey(self, tracks=None, activities=None)
-
Change the track and activity IDs to randomly assigned UUIDs. Useful for cloning unique scenes.
v = vipy.video.RandomScene() v.rekey() # randomly rekey all track and activity ID v.rekey(tracks={...}) # rekey tracks (oldkey -> newkey) according to dictionary, randomly rekey activities v.rekey(tracks={...}, activities={}) # rekey tracks according to dict, no change to activities
Args
tracks [dict]: If not None, use this dictionary to remap oldkey->newkey for tracks. If None, use random keys. If empty dict, no change (do not rekey tracks) activities [dict]: If not None, use this dictionary to remap oldkey->newkey for activities. If None, use random keys. If empty dict, no change (do not rekey activities)
Returns
This object, with all track ID and activity ID rekeyed as specified. All actor IDs in activities will be updated.
Expand source code Browse git
def rekey(self, tracks=None, activities=None): """Change the track and activity IDs to randomly assigned UUIDs. Useful for cloning unique scenes. ```python v = vipy.video.RandomScene() v.rekey() # randomly rekey all track and activity ID v.rekey(tracks={...}) # rekey tracks (oldkey -> newkey) according to dictionary, randomly rekey activities v.rekey(tracks={...}, activities={}) # rekey tracks according to dict, no change to activities ``` Args: tracks [dict]: If not None, use this dictionary to remap oldkey->newkey for tracks. If None, use random keys. If empty dict, no change (do not rekey tracks) activities [dict]: If not None, use this dictionary to remap oldkey->newkey for activities. If None, use random keys. If empty dict, no change (do not rekey activities) Returns: This object, with all track ID and activity ID rekeyed as specified. All actor IDs in activities will be updated. """ assert activities is None or isinstance(activities, dict) and all([k in self.activities() for k in activities.keys()]) assert tracks is None or isinstance(tracks, dict) and all([k in self.tracks() for k in tracks.keys()]) d_old_to_new = {k:hex(int(uuid.uuid4().hex[0:8], 16))[2:] for (k,a) in self.activities().items()} if activities is None else activities self._activities = dict([(d_old_to_new[k], a.id(d_old_to_new[k])) if k in d_old_to_new else (k,a) for (k,a) in self.activities().items()]) d_old_to_new = {k:hex(int(uuid.uuid4().hex[0:8], 16))[2:] for (k,t) in self.tracks().items()} if tracks is None else tracks self._tracks = dict([(d_old_to_new[k], t.id(d_old_to_new[k])) if k in d_old_to_new else (k,t) for (k,t) in self.tracks().items()]) for (k,v) in d_old_to_new.items(): self.activitymap(lambda a: a.replaceid(k,v) ) return self
def replace(self, other, frame=None)
-
Replace tracks and activities with other if activity/track is during frame
Expand source code Browse git
def replace(self, other, frame=None): """Replace tracks and activities with other if activity/track is during frame""" assert isinstance(other, vipy.video.Scene) self.activities([a for a in other.activitylist() if frame is None or a.during(frame)]) self.tracks([t for t in other.tracklist() if frame is None or t.during(frame)]) return self
def rescale(self, s)
-
Spatially rescale the scene by a constant scale factor.
Args
s
- [float] Scale factor > 0 to isotropically scale the image.
Expand source code Browse git
def rescale(self, s): """Spatially rescale the scene by a constant scale factor. Args: s: [float] Scale factor > 0 to isotropically scale the image. """ assert s == 1 or not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" self._tracks = {k:t.rescale(s) for (k,t) in self.tracks().items()} super().rescale(s) return self
def resize(self, rows=None, cols=None, width=None, height=None)
-
Resize the video to (rows, cols), preserving the aspect ratio if only rows or cols is provided
Expand source code Browse git
def resize(self, rows=None, cols=None, width=None, height=None): """Resize the video to (rows, cols), preserving the aspect ratio if only rows or cols is provided""" assert not (rows is not None and height is not None) # cannot be both assert not (cols is not None and width is not None) # cannot be both rows = rows if rows is not None else height cols = cols if cols is not None else width assert rows is not None or cols is not None, "Invalid input" (H,W) = self.shape() # yuck, need to get image dimensions before filter, manually set this prior to calling resize if known sy = rows / float(H) if rows is not None else cols / float(W) sx = cols / float(W) if cols is not None else rows / float(H) self._tracks = {k:t.scalex(sx) for (k,t) in self.tracks().items()} self._tracks = {k:t.scaley(sy) for (k,t) in self.tracks().items()} super().resize(rows=rows, cols=cols) return self
def setactorid(self, id)
-
Alias for
Scene.actorid()
Expand source code Browse git
def setactorid(self, id): """Alias for `vipy.video.Scene.actorid`""" return self.actorid(id, fluent=True)
def show(self, outfile=None, verbose=True, fontsize=10, captionoffset=(0, 0), textfacecolor='white', textfacealpha=1.0, shortlabel=True, boxalpha=0.25, d_category2color={'Person': 'green', 'Vehicle': 'blue', 'Object': 'red'}, categories=None, nocaption=False, nocaption_withstring=[], figure=1, fps=None, timestamp=None, timestampcolor='black', timestampfacecolor='white', mutator=None)
-
Faster show using interative image show for annotated videos. This can visualize videos before video rendering is complete, but it cannot guarantee frame rates. Large videos with complex scenes will slow this down and will render at lower frame rates.
Expand source code Browse git
def show(self, outfile=None, verbose=True, fontsize=10, captionoffset=(0,0), textfacecolor='white', textfacealpha=1.0, shortlabel=True, boxalpha=0.25, d_category2color={'Person':'green', 'Vehicle':'blue', 'Object':'red'}, categories=None, nocaption=False, nocaption_withstring=[], figure=1, fps=None, timestamp=None, timestampcolor='black', timestampfacecolor='white', mutator=None): """Faster show using interative image show for annotated videos. This can visualize videos before video rendering is complete, but it cannot guarantee frame rates. Large videos with complex scenes will slow this down and will render at lower frame rates.""" fps = min(fps, self.framerate()) if fps is not None else self.framerate() assert fps > 0, "Invalid display framerate" f_timestamp = (lambda k: '%s %d' % (vipy.util.clockstamp(), k)) if timestamp is True else timestamp f_mutator = mutator if mutator is not None else vipy.image.mutator_show_jointlabel() if not self.isdownloaded() and self.hasurl(): self.download() with Stopwatch() as sw: for (k,im) in enumerate(self.load() if self.isloaded() else self.stream()): time.sleep(max(0, (1.0/self.framerate())*int(np.ceil((self.framerate()/fps))))) f_mutator(im,k).show(categories=categories, figure=figure, nocaption=nocaption, nocaption_withstring=nocaption_withstring, fontsize=fontsize, boxalpha=boxalpha, d_category2color=d_category2color, captionoffset=captionoffset, textfacecolor=textfacecolor, textfacealpha=textfacealpha, timestampcolor=timestampcolor, timestampfacecolor=timestampfacecolor, timestamp=f_timestamp(k) if timestamp is not None else None, shortlabel=shortlabel) if vipy.globals._user_hit_escape(): break vipy.show.close(figure) return self
def stabilize(self, padheightfrac=0.125, padwidthfrac=0.25, padheightpx=None, padwidthpx=None, gpu=None, outfile=None)
-
Background stablization using flow based stabilization masking foreground region.
- This will output a video with all frames aligned to the first frame, such that the background is static.
- This uses the flow based approach described in
Flow.stabilize()
Args
padheightfrac
- [float] The height padding (relative to video height) to be applied to output video to allow for vertical stabilization
padwidthfrac
- [float] The width padding (relative to video width) to be applied to output video to allow for horizontal stabilization
padheightpx
- [int] The height padding to be applied to output video to allow for vertical stabilization. Overrides padheight.
padwidthpx
- [int] The width padding to be applied to output video to allow for horizontal stabilization. Overrides padwidth.
gpu
- [int] The GPU index to use, if opencv has been compiled with GPU support (this is rare)
outfile
- [str] The output filename to store the stabilized video
Returns
A clone of this video with background pixels stabilized to the first frame.
Note
- If the camera pans outside the image rectangle, increase the padheight or padwidth to make sure that the actor stays inside the stabilized image rectangle
- If there are moving actors in the scene, include bounding boxes for each and these boxes are ignored as keeyouts in the flow stabilization
Expand source code Browse git
def stabilize(self, padheightfrac=0.125, padwidthfrac=0.25, padheightpx=None, padwidthpx=None, gpu=None, outfile=None): """Background stablization using flow based stabilization masking foreground region. - This will output a video with all frames aligned to the first frame, such that the background is static. - This uses the flow based approach described in `vipy.flow.Flow.stabilize` Args: padheightfrac: [float] The height padding (relative to video height) to be applied to output video to allow for vertical stabilization padwidthfrac: [float] The width padding (relative to video width) to be applied to output video to allow for horizontal stabilization padheightpx: [int] The height padding to be applied to output video to allow for vertical stabilization. Overrides padheight. padwidthpx: [int] The width padding to be applied to output video to allow for horizontal stabilization. Overrides padwidth. gpu: [int] The GPU index to use, if opencv has been compiled with GPU support (this is rare) outfile: [str] The output filename to store the stabilized video Returns: A clone of this video with background pixels stabilized to the first frame. .. note:: - If the camera pans outside the image rectangle, increase the padheight or padwidth to make sure that the actor stays inside the stabilized image rectangle - If there are moving actors in the scene, include bounding boxes for each and these boxes are ignored as keeyouts in the flow stabilization """ from vipy.flow import Flow # requires opencv return Flow(flowdim=256, gpu=gpu).stabilize(self.clone(), residual=True, strict=True, padheightfrac=padheightfrac, padwidthfrac=padwidthfrac, padheightpx=padheightpx, padwidthpx=padwidthpx, outfile=outfile)
def startframe(self)
-
Expand source code Browse git
def startframe(self): return self._startframe
def thumbnail(self, outfile=None, frame=0, fontsize=10, nocaption=False, boxalpha=0.25, dpi=200, textfacecolor='white', textfacealpha=1.0)
-
Return annotated frame=k of video, save annotation visualization to provided outfile if provided, otherwise return vipy.image.Scene
Expand source code Browse git
def thumbnail(self, outfile=None, frame=0, fontsize=10, nocaption=False, boxalpha=0.25, dpi=200, textfacecolor='white', textfacealpha=1.0): """Return annotated frame=k of video, save annotation visualization to provided outfile if provided, otherwise return vipy.image.Scene""" im = self.frame(frame, img=self.preview(framenum=frame).array()) return im.savefig(outfile=outfile, fontsize=fontsize, nocaption=nocaption, boxalpha=boxalpha, dpi=dpi, textfacecolor=textfacecolor, textfacealpha=textfacealpha) if outfile is not None else im
def track(self, id)
-
Expand source code Browse git
def track(self, id): return self.tracks(id=id)
def trackbox(self, dilate=1.0)
-
The trackbox is the union of all track bounding boxes in the video, or None if there are no tracks
Args
dilate
- [float] A dilation factor to apply to the trackbox before returning.
See
BoundingBox.dilate()
Returns
A
BoundingBox
which is the union of all boxes in the track (or None if no boxes exist)Expand source code Browse git
def trackbox(self, dilate=1.0): """The trackbox is the union of all track bounding boxes in the video, or None if there are no tracks Args: dilate: [float] A dilation factor to apply to the trackbox before returning. See `vipy.geometry.BoundingBox.dilate` Returns: A `vipy.geometry.BoundingBox` which is the union of all boxes in the track (or None if no boxes exist) """ boxes = [t.clone().boundingbox() for t in self.tracklist()] boxes = [bb.dilate(dilate) for bb in boxes if bb is not None] return boxes[0].union(boxes[1:]) if len(boxes) > 0 else None
def trackclip(self)
-
Split the scene into k separate scenes, one for each track. Each scene starts and ends when the track starts and ends
Expand source code Browse git
def trackclip(self): """Split the scene into k separate scenes, one for each track. Each scene starts and ends when the track starts and ends""" return [t.setattribute('_instance_id', '%s_%d_trackclip' % (t.videoid(), k)).clip(t.track(t.actorid()).startframe(), t.track(t.actorid()).endframe()) for (k,t) in enumerate(self.tracksplit())]
def trackcrop(self, dilate=1.0, maxsquare=False, zeropad=True)
-
Return the trackcrop() of the scene which is the crop of the video using the
Scene.trackbox()
.Args
zeropad
- [bool] If True, the zero pad the crop if it is outside the image rectangle, otherwise return only valid pixels inside the image rectangle
maxsquare
- [bool] If True, make the bounding box the maximum square before cropping
dilate
- [float] The dilation factor to apply to the trackbox prior to cropping
Returns
A
Scene
object from cropping the video using the trackbox. If there are no tracks, return None.Expand source code Browse git
def trackcrop(self, dilate=1.0, maxsquare=False, zeropad=True): """Return the trackcrop() of the scene which is the crop of the video using the `vipy.video.Scene.trackbox`. Args: zeropad: [bool] If True, the zero pad the crop if it is outside the image rectangle, otherwise return only valid pixels inside the image rectangle maxsquare: [bool] If True, make the bounding box the maximum square before cropping dilate: [float] The dilation factor to apply to the trackbox prior to cropping Returns: A `vipy.video.Scene` object from cropping the video using the trackbox. If there are no tracks, return None. """ bb = self.trackbox(dilate) # may be None if trackbox is degenerate return self.crop(bb.maxsquareif(maxsquare), zeropad=zeropad) if bb is not None else None
def trackfilter(self, f, activitytrack=True)
-
Apply lambda function f to each object and keep if filter is True.
Args
activitytrack
- [bool] If true, remove track assignment from activities also, may result in activities with no tracks
f
- [lambda] The lambda function to apply to each track t, and if f(t) returns True, then keep the track
Returns
self, with tracks removed in-place
Note: Applying track filter with activitytrack=True may result in activities with no associated tracks. You should follow up with self.activityfilter(lambda a: len(a.trackids()) > 0).
Expand source code Browse git
def trackfilter(self, f, activitytrack=True): """Apply lambda function f to each object and keep if filter is True. Args: activitytrack: [bool] If true, remove track assignment from activities also, may result in activities with no tracks f: [lambda] The lambda function to apply to each track t, and if f(t) returns True, then keep the track Returns: self, with tracks removed in-place .. note:: Applying track filter with activitytrack=True may result in activities with no associated tracks. You should follow up with self.activityfilter(lambda a: len(a.trackids()) > 0). """ assert callable(f) self._tracks = {k:t for (k,t) in self.tracks().items() if f(t) == True} if activitytrack: self.activitymap(lambda a: a.trackfilter(lambda ti: ti in self._tracks)) # remove track association in activities #if any([len(a.tracks()) == 0 for a in self.activitylist()]): # warnings.warn('trackfilter(..., activitytrack=True) removed tracks which returned at least one degenerate activity with no tracks') return self
def trackidx(self, idx)
-
Return the track at the specified index in the tracklist
Expand source code Browse git
def trackidx(self, idx): """Return the track at the specified index in the tracklist""" return self.tracklist()[idx]
def trackindex(self, id)
-
Return the index in the tracklist of the track with the provided track id
Expand source code Browse git
def trackindex(self, id): """Return the index in the tracklist of the track with the provided track id""" assert id in self.tracks() return [t.id() for t in self.tracklist()].index(id)
def tracklist(self)
-
Expand source code Browse git
def tracklist(self): return list(self.tracks().values()) # triggers shallow copy
def trackmap(self, f, strict=True)
-
Apply lambda function f to each activity
-strict=True: enforce that lambda function must return non-degenerate Track() objects
Expand source code Browse git
def trackmap(self, f, strict=True): """Apply lambda function f to each activity -strict=True: enforce that lambda function must return non-degenerate Track() objects """ assert callable(f) self._tracks = {k:f(t) for (k,t) in self.tracks().items()} assert all([isinstance(t, vipy.object.Track) and (strict is False or not t.isdegenerate()) for (tk,t) in self.tracks().items()]), "Lambda function must return non-degenerate vipy.object.Track()" return self
def tracks(self, tracks=None, id=None)
-
Return mutable dictionary of tracks,
Args
tracks [dict]: If provided, replace track dictionary with provided track dictionary, and return self id [str]: If provided, return just the track associated with the provided track id
Returns
This object if tracks!=None, otherwise the requested track (if id!=None) or trackdict (tracks=None)
Expand source code Browse git
def tracks(self, tracks=None, id=None): """Return mutable dictionary of tracks, Args: tracks [dict]: If provided, replace track dictionary with provided track dictionary, and return self id [str]: If provided, return just the track associated with the provided track id Returns: This object if tracks!=None, otherwise the requested track (if id!=None) or trackdict (tracks=None) """ if isinstance(self._tracks, tuple): self._tracks = {t.id():t for t in [vipy.object.Track.from_json(json.loads(s)) for s in self._tracks]} # on-demand unpack (efficient garbage collection for large list of objects) if tracks is None and id is None: return self._tracks # mutable dict elif id is not None: return self._tracks[id] elif isinstance(tracks, dict): assert all([isinstance(t, vipy.object.Track) and k == t.id() for (k,t) in tracks.items()]), "Invalid input - Must be dictionary of vipy.object.Track" self._tracks = tracks.copy() # shallow copy return self else: assert all([isinstance(t, vipy.object.Track) for t in tolist(tracks)]), "Invalid input - Must be vipy.object.Track or list of vipy.object.Track" self._tracks = {t.id():t for t in tolist(tracks)} # insertion order preserved (python >=3.6) return self
def tracksplit(self)
-
Split the scene into k separate scenes, one for each track. Each scene starts at frame 0 and is a shallow copy of self containing exactly one track.
- This is useful for visualization by breaking a scene into a list of scenes that contain only one track.
- The attribute '_trackindex' is set in the attributes dictionary to provide provenance for the track relative to the source video
Notes: Use clone() to create a deep copy if needed.
Expand source code Browse git
def tracksplit(self): """Split the scene into k separate scenes, one for each track. Each scene starts at frame 0 and is a shallow copy of self containing exactly one track. - This is useful for visualization by breaking a scene into a list of scenes that contain only one track. - The attribute '_trackindex' is set in the attributes dictionary to provide provenance for the track relative to the source video .. notes:: Use clone() to create a deep copy if needed. """ return [self.clone(shallow=True).setattribute('_trackindex', k).tracks(t).activityfilter(lambda a: a.hastrack(tk)) for (k,(tk,t)) in enumerate(self.tracks().items())]
def union(self, other, temporal_iou_threshold=0.5, spatial_iou_threshold=0.6, strict=True, overlap='average', percentilecover=0.8, percentilesamples=100, activity=True, track=True)
-
Compute the union two scenes as the set of unique activities and tracks.
A pair of activities or tracks are non-unique if they overlap spatially and temporally by a given IoU threshold. Merge overlapping tracks. Tracks are merged by considering the mean IoU at the overlapping segment of two tracks with the same category greater than the provided spatial_iou_threshold threshold Activities are merged by considering the temporal IoU of the activities of the same class greater than the provided temporal_iou_threshold threshold
Args
Other
- Scene or list of scenes for union. Other may be a clip of self at a different framerate, spatial isotropic scake, clip offset
spatial_iou_threshold
- The intersection over union threshold for the mean of the two segments of an overlapping track, Disable by setting to 1.0
temporal_iou_threshold
- The intersection over union threshold for a temporal bounding box for a pair of activities to be declared duplicates. Disable by setting to 1.0
strict
- Require both scenes to share the same underlying video filename
- overlap=['average', 'replace', 'keep']
- - average: Merge two tracks by averaging the boxes (average=True) if overlapping
- - replace: merge two tracks by replacing overlapping boxes with other (discard self)
- - keep: merge two tracks by keeping overlapping boxes with other (discard other)
percentilecover
- [0,1]: When determining the assignment of two tracks, compute the percentilecover of two tracks by ranking the cover in the overlapping segment and computing the mean of the top-k assignments, where k=len(segment)*percentilecover.
percentilesamples
- [>1]: the number of samples along the overlapping scemgne for computing percentile cover
activity
- [bool]: union() of activities only
track
- [bool]: union() of tracks only
Returns
Updates this scene to include the non-overlapping activities from other. By default, it takes the strict union of all activities and tracks.
Note
- This is useful for merging scenes computed using a lower resolution/framerate/clipped object or activity detector without running the detector on the high-res scene
- This function will preserve the invariance for v == v.clear().union(v.rescale(0.5).framerate(5).activityclip()), to within the quantization error of framerate() downsampling.
- percentileiou is a robust method of track assignment when boxes for two tracks (e.g. ground truth and detections) where one track may deform due to occlusion.
Expand source code Browse git
def union(self, other, temporal_iou_threshold=0.5, spatial_iou_threshold=0.6, strict=True, overlap='average', percentilecover=0.8, percentilesamples=100, activity=True, track=True): """Compute the union two scenes as the set of unique activities and tracks. A pair of activities or tracks are non-unique if they overlap spatially and temporally by a given IoU threshold. Merge overlapping tracks. Tracks are merged by considering the mean IoU at the overlapping segment of two tracks with the same category greater than the provided spatial_iou_threshold threshold Activities are merged by considering the temporal IoU of the activities of the same class greater than the provided temporal_iou_threshold threshold Args: Other: Scene or list of scenes for union. Other may be a clip of self at a different framerate, spatial isotropic scake, clip offset spatial_iou_threshold: The intersection over union threshold for the mean of the two segments of an overlapping track, Disable by setting to 1.0 temporal_iou_threshold: The intersection over union threshold for a temporal bounding box for a pair of activities to be declared duplicates. Disable by setting to 1.0 strict: Require both scenes to share the same underlying video filename overlap=['average', 'replace', 'keep'] - average: Merge two tracks by averaging the boxes (average=True) if overlapping - replace: merge two tracks by replacing overlapping boxes with other (discard self) - keep: merge two tracks by keeping overlapping boxes with other (discard other) percentilecover: [0,1]: When determining the assignment of two tracks, compute the percentilecover of two tracks by ranking the cover in the overlapping segment and computing the mean of the top-k assignments, where k=len(segment)*percentilecover. percentilesamples: [>1]: the number of samples along the overlapping scemgne for computing percentile cover activity: [bool]: union() of activities only track: [bool]: union() of tracks only Returns: Updates this scene to include the non-overlapping activities from other. By default, it takes the strict union of all activities and tracks. .. note:: - This is useful for merging scenes computed using a lower resolution/framerate/clipped object or activity detector without running the detector on the high-res scene - This function will preserve the invariance for v == v.clear().union(v.rescale(0.5).framerate(5).activityclip()), to within the quantization error of framerate() downsampling. - percentileiou is a robust method of track assignment when boxes for two tracks (e.g. ground truth and detections) where one track may deform due to occlusion. """ assert overlap in ['average', 'replace', 'keep'], "Invalid input - 'overlap' must be in [average, replace, keep]" assert spatial_iou_threshold >= 0 and spatial_iou_threshold <= 1, "invalid spatial_iou_threshold, must be between [0,1]" assert temporal_iou_threshold >= 0 and temporal_iou_threshold <= 1, "invalid temporal_iou_threshold, must be between [0,1]" assert percentilesamples >= 1, "invalid samples, must be >= 1" if not activity and not track: return self # nothing to do sc = self.clone() # do not change self yet, make a copy then merge at the end for o in tolist(other): assert isinstance(o, Scene), "Invalid input - must be vipy.video.Scene() object and not type=%s" % str(type(o)) if strict: assert sc.filename() == o.filename(), "Invalid input - Scenes must have the same underlying video. Disable this with strict=False." oc = o.clone() # do not change other, make a copy # Key collision? if len(set(sc.tracks().keys()).intersection(set(oc.tracks().keys()))) > 0: print('[vipy.video.union]: track key collision - Rekeying other... Use other.rekey() to suppress this warning.') oc.rekey() if len(set(sc.activities().keys()).intersection(set(oc.activities().keys()))) > 0: print('[vipy.video.union]: activity key collision - Rekeying other... Use other.rekey() to suppress this warning.') oc.rekey() # Similarity transform? Other may differ from self by a temporal scale (framerate), temporal translation (clip) or spatial isotropic scale (rescale) assert np.isclose(sc.aspect_ratio(), oc.aspect_ratio(), atol=1E-2), "Invalid input - Scenes must have the same aspect ratio" if sc.width() != oc.width(): oc = oc.rescale(sc.width() / oc.width()) # match spatial scale if not np.isclose(sc.framerate(), oc.framerate(), atol=1E-3): oc = oc.framerate(sc.framerate()) # match temporal scale (video in oc will not match, only annotations) if sc.startframe() != oc.startframe(): dt = (oc.startframe() if oc.startframe() is not None else 0) - (sc.startframe() if sc.startframe() is not None else 0) oc = oc.trackmap(lambda t: t.offset(dt=dt)).activitymap(lambda a: a.offset(dt=dt)) # match temporal translation of tracks and activities oc = oc.trackfilter(lambda t: ((not t.isdegenerate()) and len(t)>0), activitytrack=False) # Merge other tracks into selfclone: one-to-many mapping from self to other merged = {} # dictionary mapping trackid in other to the trackid in self, each track in other can be merged at most once for ti in sorted(sc.tracklist(), key=lambda t: len(t), reverse=True): # longest to shortest for tj in sorted(oc.tracklist(), key=lambda t: len(t), reverse=True): if ti.category() == tj.category() and (tj.id() not in merged) and tj.segment_percentilecover(sc.track(ti.id()), percentile=percentilecover, samples=percentilesamples) > spatial_iou_threshold: # mean framewise overlap during overlapping segment of two tracks sc.tracks()[ti.id()] = sc.track(ti.id()).union(tj, overlap=overlap) # merge duplicate/fragmented tracks from other into self, union() returns clone merged[tj.id()] = ti.id() print('[vipy.video.union]: merging track "%s"(id=%s) + "%s"(id=%s) for scene "%s"' % (str(ti), str(ti.id()), str(tj), str(tj.id()), str(sc))) oc.trackfilter(lambda t: t.id() not in merged, activitytrack=False) # remove duplicate other track for final union # Merge other activities into selfclone: one-to-one mapping for (i,j) in merged.items(): # i=id of other, j=id of self oc.activitymap(lambda a: a.replaceid(i, j) if a.hastrack(i) else a) # update track IDs referenced in activities for merged tracks for (i,ai) in sc.activities().items(): for (j,aj) in oc.activities().items(): if ai.category() == aj.category() and set(ai.trackids()) == set(aj.trackids()) and ai.temporal_iou(aj) > temporal_iou_threshold: oc.activityfilter(lambda a: a.id() != j) # remove duplicate activity from final union oc.activityfilter(lambda a: len(a.tracks())>0) # remove empty activities not merged # Union sc.tracks().update(oc.tracks()) sc.activities().update(oc.activities()) # Final union of unique tracks/activities if track: self.tracks(sc.tracklist()) # union of tracks only if activity: self.activities(sc.activitylist()) # union of activities only: may reference tracks not in self of track=False return self
def zeromask(self)
-
Alias for fgmask
Expand source code Browse git
def zeromask(self): """Alias for fgmask""" return self.fgmask()
def zeropad(self, padwidth, padheight)
-
Zero pad the video with padwidth columns before and after, and padheight rows before and after Update tracks accordingly.
Expand source code Browse git
def zeropad(self, padwidth, padheight): """Zero pad the video with padwidth columns before and after, and padheight rows before and after Update tracks accordingly. """ assert isinstance(padwidth, int) and isinstance(padheight, int) super().zeropad(padwidth, padheight) self._tracks = {k:t.offset(dx=padwidth, dy=padheight) for (k,t) in self.tracks().items()} return self
Inherited members
VideoCategory
:abspath
array
aspect_ratio
bias
bytes
canload
centercrop
centersquare
channels
channelshape
clearattributes
cliprange
clone
colorspace
commandline
concatenate
cropeven
dict
download
downloadif
duration
duration_in_frames
duration_in_frames_of_videofile
duration_in_seconds_of_videofile
fetch
ffplay
filename
filesize
fliplr
flipud
flush
flush_and_return
frame_meta
framerate_of_videofile
frames
fromarray
fromdirectory
fromframes
gain
getattribute
gif
hasattribute
hasfilename
hasurl
height
iframes
iscolor
isdownloaded
isgrayscale
isloadable
isloaded
issquare
load
map
maxdim
maxmatte
maxsquare
metadata
metaframe
mindim
minsquare
mutable
normalize
nourl
numpy
pad
pkl
pklif
play
preview
print
printif
probe
probeshape
randomcrop
relpath
rename
resolution_of_videofile
restore
returns
rot90ccw
rot90cw
sanitize
saveas
savetemp
savetmp
shape
speed
store
stream
take
tocache
tonumpy
torch
unstore
url
videoid
webp
width
zeropadlike
class Stream (v, queuesize, write, overwrite, bitrate=None, buffered=False, bufsize=256, rebuffered=False)
-
vipy.video.Stream class.
This class is the primary mechanism for streaming frames and clips from long videos or live video streams.
- The stream is constructed from a shared underlying video in self._video.
- As the shared video is updated with annotations, the stream can generate frames and clips that contain these annotations
- The shared video allows for multiple concurrent iterators all sourced from the same video, iterating over different frames, clips and rates
- The iterator leverages a pipe to FFMPEG, reading numpy frames from the video filter chain.
- The pipe is written from a thread which is dedicated to reading frames from ffmpeg
- Each numpy frame is added to a queue, with a null termintor when end of stream is reached
- The iterator then reads from the queue, and returns annotated frames
This iterator can also be used as a buffered stream. Buffered streams have a primary iterator which saves a fixed stream buffer of frames so that subsequent iterators can pull temporally aligned frames. This is useful to avoid having multiple FFMPEG pipes open simultaneously, and can allow for synchronized access to live video streams without timestamping.
- The primary iterator is the first iterator over the video with stream(buffered=True)
- The primary iterator creates a private attribute self._video.attributes['__stream_buffer'] which caches frames
- The stream buffer saves numpy arrays from the iterator with a fixed buffer length (number of frames)
- The secondary iterator (e.g. any iterator that accesses the video after the primary iterator is initially created) will read from the stream buffer
- All iterators share the underlying self._video object in the stream so that if the video annotations are updated by an iterator, the annotated frames are accessible in the iterators
- The secondary iterators are synchronized to the stream buffer that is read by the primary iterator. This is useful for synchronizing streams for live camera streams without absolute timestamps.
- There can be an unlimited number of secondary iterators, without incurring a penalty on frame access
This iterator can iterate over clips, frames or batches.
- A clip is a sequence of frames such that each clip is separated by a fixed number of frames.
- Clips are useful for temporal encoding of short atomic activities
- A batch is a sequence of n frames with a stride of n.
- A batch is useful for iterating over groups of frames that are operated in parallel on a GPU
for (im1, im2, v3) in zip(v.stream(buffered=True), v.stream(buffered=True).frame(delay=30), v.stream(buffered=True).clip(n=16,m=1): # im1: `vipy.image.Scene` at frame index k # im2: `vipy.image.Scene` at frame index k-30 # v3: `vipy.video.Scene` at frame range [k, k-16]
Note
- This is designed to be accessed as
Video.stream()
and not accessed as a standalone class..
Expand source code Browse git
class Stream(object): """vipy.video.Stream class. This class is the primary mechanism for streaming frames and clips from long videos or live video streams. - The stream is constructed from a shared underlying video in self._video. - As the shared video is updated with annotations, the stream can generate frames and clips that contain these annotations - The shared video allows for multiple concurrent iterators all sourced from the same video, iterating over different frames, clips and rates - The iterator leverages a pipe to FFMPEG, reading numpy frames from the video filter chain. - The pipe is written from a thread which is dedicated to reading frames from ffmpeg - Each numpy frame is added to a queue, with a null termintor when end of stream is reached - The iterator then reads from the queue, and returns annotated frames This iterator can also be used as a buffered stream. Buffered streams have a primary iterator which saves a fixed stream buffer of frames so that subsequent iterators can pull temporally aligned frames. This is useful to avoid having multiple FFMPEG pipes open simultaneously, and can allow for synchronized access to live video streams without timestamping. - The primary iterator is the first iterator over the video with stream(buffered=True) - The primary iterator creates a private attribute self._video.attributes['__stream_buffer'] which caches frames - The stream buffer saves numpy arrays from the iterator with a fixed buffer length (number of frames) - The secondary iterator (e.g. any iterator that accesses the video after the primary iterator is initially created) will read from the stream buffer - All iterators share the underlying self._video object in the stream so that if the video annotations are updated by an iterator, the annotated frames are accessible in the iterators - The secondary iterators are synchronized to the stream buffer that is read by the primary iterator. This is useful for synchronizing streams for live camera streams without absolute timestamps. - There can be an unlimited number of secondary iterators, without incurring a penalty on frame access This iterator can iterate over clips, frames or batches. - A clip is a sequence of frames such that each clip is separated by a fixed number of frames. - Clips are useful for temporal encoding of short atomic activities - A batch is a sequence of n frames with a stride of n. - A batch is useful for iterating over groups of frames that are operated in parallel on a GPU ```python for (im1, im2, v3) in zip(v.stream(buffered=True), v.stream(buffered=True).frame(delay=30), v.stream(buffered=True).clip(n=16,m=1): # im1: `vipy.image.Scene` at frame index k # im2: `vipy.image.Scene` at frame index k-30 # v3: `vipy.video.Scene` at frame range [k, k-16] ``` .. note:: - This is designed to be accessed as `vipy.video.Video.stream` and not accessed as a standalone class.. """ def __init__(self, v, queuesize, write, overwrite, bitrate=None, buffered=False, bufsize=256, rebuffered=False): self._video = v # do not clone self._write_pipe = None self._vcodec = 'libx264' self._bitrate = bitrate # e.g. '2000k', recommended settings for live streaming self._framerate = self._video.framerate() self._outfile = self._video.filename() self._write = write or overwrite assert self._write is False or (overwrite is True or not os.path.exists(self._outfile)), "Output file '%s' exists - Writable stream cannot overwrite existing video file unless overwrite=True" % self._outfile if overwrite and os.path.exists(self._outfile): os.remove(self._outfile) self._shape = self._video.shape() if (not self._write) or (not self._video.isstreaming() and self._video.canload()) else None # shape for write can be defined by first frame assert (write is True or overwrite is True) or self._shape is not None, "Invalid video '%s'" % (str(v)) self._queuesize = queuesize self._bufsize = bufsize self._buffered = buffered self._is_stream_buffer_owner = False assert self._bufsize >= 1 if rebuffered: self._video.attributes.pop("__stream_buffer", None) # force reinitialization def __enter__(self): """Write pipe context manager""" assert self._write, "invalid parameters for write only context manager" if self._shape is not None: (height, width) = self._shape outfile = self._outfile if self._outfile is not None else self._url # may be youtube/twitch live stream outrate = 30 if vipy.util.isRTMPurl(outfile) else self._video.framerate() fiv = (ffmpeg.input('pipe:', format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(width, height), r=self._video.framerate()) .filter('pad', 'ceil(iw/2)*2', 'ceil(ih/2)*2')) fi = ffmpeg.concat(fiv.filter('fps', fps=30, round='up'), ffmpeg.input('anullsrc', f='lavfi'), v=1, a=1) if isRTMPurl(outfile) else fiv # empty audio for youtube-live kwargs = {'video_bitrate':self._bitrate} if self._bitrate is not None else {} fo = (fi.output(filename=self._outfile if self._outfile is not None else self._url, pix_fmt='yuv420p', vcodec=self._vcodec, f='flv' if vipy.util.isRTMPurl(outfile) else vipy.util.fileext(outfile, withdot=False), g=2*outrate, **kwargs) .overwrite_output() .global_args('-cpuflags', '0', '-loglevel', 'quiet' if not vipy.globals.isdebug() else 'debug')) self._write_pipe = fo.run_async(pipe_stdin=True) self._writeindex = 0 return self def __exit__(self, type=None, value=None, tb=None): """Write pipe context manager ..note:: This is triggered on ctrl-c as the last step for cleanup """ if self._write_pipe is not None: self._write_pipe.stdin.close() self._write_pipe.wait() del self._write_pipe self._write_pipe = None if type is not None: raise return self def __call__(self, im): """alias for write()""" return self.write(im) def __reduce__(self): """When pickling, ignore all open pipes, restore as read-only stream. User is responsible for restoring the stream after unpickling.""" return (self.__class__, (self._video, self._queuesize, False, False, self._bitrate, self._buffered, self._bufsize, True)) def _read_pipe(self): if not self._video.isloaded(): p = self._video._ffmpeg.output('pipe:', format='rawvideo', pix_fmt='rgb24').global_args('-nostdin', '-loglevel', 'debug' if vipy.globals.isdebug() else 'quiet').run_async(pipe_stdout=True, pipe_stderr=True) assert p is not None, "Invalid read pipe" p.poll() return p else: return None def framerate(self): return self._video.framerate() def __iter__(self): """Stream individual video frames. """ try: if self._video.isloaded(): # For loaded video, just use the existing iterator for in-memory video for k in range(len(self._video)): yield self._video[k] else: # First stream iterator: read from video and store in stream buffer for all other iterators to access if self._buffered and not self._video.hasattribute('__stream_buffer'): self._video.attributes['__stream_buffer'] = {} # for synchronized frames with secondary iterator self._is_stream_buffer_owner = True # track which iterator created the stream buffer for cleanup in 'finally' # Video pipe thread: # - Initialized only if not buffered (the default) or if primary iterator # - read numpy frames from the ffmpeg filter chain via a pipe # - store the resulting frames in a queue with a null terminated frame when the stream ends # - Threading is useful here because there is often time to switch when waiting on GPU I/O (t,q,p) = (None, None,None) if not self._buffered or self._is_stream_buffer_owner: p = self._read_pipe() q = queue.Queue(self._queuesize) (h, w) = self._shape def _f_threadloop(pipe, queue, height, width, event): assert pipe is not None, "Invalid pipe" assert queue is not None, "invalid queue" f = 0 while True: in_bytes = pipe.stdout.read(height * width * 3) if not in_bytes: queue.put(None) pipe.poll() pipe.wait() queue.join() event.wait() if pipe.returncode != 0: raise ValueError('Stream iterator exited with returncode %d' % (pipe.returncode)) break else: queue.put(np.frombuffer(in_bytes, np.uint8).reshape([height, width, 3])) e = threading.Event() t = threading.Thread(target=_f_threadloop, args=(p, q, h, w, e), daemon=True, name='vipy.video.Stream.__iter__') t.start() # Stream iterator: # -read frames from the thread queue and write to the private stream buffer stored as a video attribute with a frame index # -Frames are also yielded for the primary iterator # -The stream buffer is a dictionary that is n frames long, and if the newest frame from the pipe is frame k, the oldest frame is k-n which is yielded first # -If the stream is unbuffered, just read from the queue directly and yield the numpy frame b = self._video.attributes['__stream_buffer'] if self._buffered else None # buffer (if requested) k = 0 # current frame while True: if not self._buffered or self._is_stream_buffer_owner: # Primary iterator: read from thread queue if b is None: (f, img) = (k, q.get()) # unbuffered: to yield latest from thread queue directly q.task_done() k += 1 else: b[k] = q.get() # add to stream buffer, cache frames only, annotations are added synchronously on yield q.task_done() (f, img) = (k, b[k]) # primary buffer: yield current frame from pipe k += 1 else: # Secondary iterator: read from stream buffer while k not in b: # "Event" wait: wait for primary iterator to start up and fill buffer, after it starts filling, this sleep should be unnecessary time.sleep(0.001) (f, img) = (k, b[k]) # secondary buffers: yield from stream buffer k += 1 if img is not None: yield self._video.frame(f, img) # yield a vipy.image.Scene object with annotations at frame f, using the latest annotation from the shared video object and shallow copy of img if b is not None and self._is_stream_buffer_owner: if len(b) > self._bufsize: del b[min(b.keys())] # remove oldest frame from stream buffer else: if not self._buffered or self._is_stream_buffer_owner: e.set() break # termination if t is not None: del p,q,t except: raise finally: if self._is_stream_buffer_owner: self._video.delattribute('__stream_buffer') # cleanup, or force a reinitialization by passing the rebuffered=True to the primary iterator def __getitem__(self, k): """Retrieve individual frame index - this is inefficient, use __iter__ instead""" return self._video.preview(frame=k) # this is inefficient def write(self, im, flush=False): """Write individual frames to write stream""" assert isinstance(im, vipy.image.Image) if self._shape is None: self._shape = im.shape() assert im.channels() == 3, "RGB frames required" self.__enter__() assert self._write_pipe is not None, "Write stream cannot be initialized" assert im.shape() == self._shape, "Shape cannot change during writing" self._write_pipe.stdin.write(im.array().astype(np.uint8).tobytes()) if flush: self._write_pipe.stdin.flush() # do we need this? if isinstance(im, vipy.image.Scene) and len(im.objects()) > 0 and isinstance(self._video, vipy.video.Scene): for obj in im.objects(): self._video.add(obj, frame=self._writeindex, rangecheck=False) self._writeindex += 1 # assumes that the source image is at the appropriate frame rate for this video def clip(self, n, m=1, continuous=False, tracks=True, activities=True, delay=0, ragged=False): """Stream clips of length n such that the yielded video clip contains frame(0+delay) to frame(n+delay), and next contains frame(m+delay) to frame(n+m+delay). Usage examples: ```python for vc in v.stream().clip(n=16, m=2): # yields video vc with frames [0,16] from v # then video vc with frames [2,18] from v # ... finally video with frames [len(v)-n-1, len(v)-1] ``` Introducing a delay so that the clips start at a temporal offset from v ```python for vc in v.stream().clip(n=8, m=3, delay=1): # yields video vc with frames [1,9] # then video vc with frames [4,12] ... ``` Args: n: [int] the length of the clip in frames m: [int] the stride between clips in frames delay: [int] The temporal delay in frames for the clip, must be less than n and >= 0 continuous: [bool] if true, then yield None for the sequential frames not aligned with a stride so that a clip is yielded on every frame activities: [bool] if false, then activities from the source video are not copied into the clip tracks: [bool] if false, then tracks from the source video are not copied into the clip Returns: An iterator that yields `vipy.video.Video` objects each of length n with startframe += m, starting at frame=delay, such that each video contains the tracks and activities (if requested) for this clip sourced from the shared stream video. .. note:: This iterator runs in a thread to help speed up fetching of frames for GPU I/Oe bound operations """ assert isinstance(n, int) and n>0, "Clip length must be a positive integer" assert isinstance(m, int) and m>0, "Clip stride must be a positive integer" assert isinstance(delay, int) and delay >= 0 and delay < n, "Clip delay must be a positive integer less than n" assert not self._buffered or 3*n < self._bufsize, "increase buffered stream size (bufsize) from %d to >%d" % (self._bufsize, 3*n) def _f_threadloop(v, streamiter, queue, event, ragged, m, n): (frames, newframes) = ([], []) for (k,im) in enumerate(streamiter()): newframes.append(im) if len(newframes) >= m and len(frames)+len(newframes) >= n: # Use frameindex+1 so that we include (0,1), (1,2), (2,3), ... for n=2, m=1 # The delay shifts the clip +delay frames (1,2,3), (3,4,5), ... for n=3, m=2, delay=1 frames.extend(newframes) (frames, newframes) = (frames[-n:], []) queue.put( (v.clear().clone(shallow=True).fromframes(frames), k) ) # fromframes() triggers array copy of frames elif continuous: queue.put( (None, k) ) if ragged and len(newframes) > 0: queue.put( (v.clear().clone(shallow=True).fromframes(newframes), k) ) # fromframes() triggers array copy of newframes queue.put( (None, None) ) event.wait() vc = self._video.clone(flushfilter=True).clear().nourl().nofilename() q = queue.Queue(3) # warning: if this queuesize*n > buffersize, then there can be a deadlock e = threading.Event() t = threading.Thread(target=_f_threadloop, args=(vc, self.__iter__, q, e, ragged, m, n), daemon=True, name='vipy.video.Stream.clip') t.start() f_copy_annotations = lambda v, k, n: (v.activities([a.clone().offset(-(k-(n-1))).truncate(0,n-1) for (ak,a) in self._video.activities().items() if a.during_interval(k-(n-1), k, inclusive=False)] if activities else []) .tracks([t.clone(k-(n-1), k).offset(-(k-(n-1))).truncate(0,n-1) for (tk,t) in self._video.tracks().items() if t.during_interval(k-(n-1), k)] if tracks else []) if (v is not None and isinstance(v, vipy.video.Scene)) else v) try: while True: # The queue can be filled with more expensive copies and clones to speed up iteration when waiting for GPU I/O (v, k) = q.get() q.task_done() if k is None: e.set() break else: # This copy must be done sychronized at frame k with the current state of the annotations in the shared self._video yield f_copy_annotations(v, k, len(v)) if v is not None else None finally: e.set() # thread cleanup on early exit del q,t def batch(self, n): """Stream batches of length n such that each batch contains frames [0, n-1], [n, 2n-1], ... Last batch will be ragged. The primary use case for batch() is to provide a mechanism for parallel batch processing on a GPU. ```python for im_gpu in myfunc(vi.stream().batch(16))): print(im_gpu) def myfunc(gen): for vb in gen: # process the batch vb (length n) in parallel by encoding on a GPU with batchsize=n for im in f_gpu(vb): yield im_gpu: ``` This will then yield the GPU batched processed image im_gpu. """ return self.clip(n=n, m=n, continuous=False, ragged=True) def frame(self, delay=0): """Stream individual frames of video with negative offset n frames to the stream head. If delay=30, this will return a frame 30 frames ago""" assert isinstance(delay, int) and delay >= 0, "Frame delay must be positive integer" n = -delay frames = [] i = 0 for (k,im) in enumerate(self): frames.append( (k,im) ) (kout, imout) = frames[0] frames.pop(0) if len(frames) > abs(n) else None i = k yield self._video.frame(kout, imout.array()) if len(frames) == delay else None # refetch for track interpolation
Methods
def batch(self, n)
-
Stream batches of length n such that each batch contains frames [0, n-1], [n, 2n-1], … Last batch will be ragged.
The primary use case for batch() is to provide a mechanism for parallel batch processing on a GPU.
for im_gpu in myfunc(vi.stream().batch(16))): print(im_gpu) def myfunc(gen): for vb in gen: # process the batch vb (length n) in parallel by encoding on a GPU with batchsize=n for im in f_gpu(vb): yield im_gpu:
This will then yield the GPU batched processed image im_gpu.
Expand source code Browse git
def batch(self, n): """Stream batches of length n such that each batch contains frames [0, n-1], [n, 2n-1], ... Last batch will be ragged. The primary use case for batch() is to provide a mechanism for parallel batch processing on a GPU. ```python for im_gpu in myfunc(vi.stream().batch(16))): print(im_gpu) def myfunc(gen): for vb in gen: # process the batch vb (length n) in parallel by encoding on a GPU with batchsize=n for im in f_gpu(vb): yield im_gpu: ``` This will then yield the GPU batched processed image im_gpu. """ return self.clip(n=n, m=n, continuous=False, ragged=True)
def clip(self, n, m=1, continuous=False, tracks=True, activities=True, delay=0, ragged=False)
-
Stream clips of length n such that the yielded video clip contains frame(0+delay) to frame(n+delay), and next contains frame(m+delay) to frame(n+m+delay).
Usage examples:
for vc in v.stream().clip(n=16, m=2): # yields video vc with frames [0,16] from v # then video vc with frames [2,18] from v # ... finally video with frames [len(v)-n-1, len(v)-1]
Introducing a delay so that the clips start at a temporal offset from v
for vc in v.stream().clip(n=8, m=3, delay=1): # yields video vc with frames [1,9] # then video vc with frames [4,12] ...
Args
n
- [int] the length of the clip in frames
m
- [int] the stride between clips in frames
delay
- [int] The temporal delay in frames for the clip, must be less than n and >= 0
continuous
- [bool] if true, then yield None for the sequential frames not aligned with a stride so that a clip is yielded on every frame
activities
- [bool] if false, then activities from the source video are not copied into the clip
tracks
- [bool] if false, then tracks from the source video are not copied into the clip
Returns
An iterator that yields
Video
objects each of length n with startframe += m, starting at frame=delay, such that each video contains the tracks and activities (if requested) for this clip sourced from the shared stream video.Note: This iterator runs in a thread to help speed up fetching of frames for GPU I/Oe bound operations
Expand source code Browse git
def clip(self, n, m=1, continuous=False, tracks=True, activities=True, delay=0, ragged=False): """Stream clips of length n such that the yielded video clip contains frame(0+delay) to frame(n+delay), and next contains frame(m+delay) to frame(n+m+delay). Usage examples: ```python for vc in v.stream().clip(n=16, m=2): # yields video vc with frames [0,16] from v # then video vc with frames [2,18] from v # ... finally video with frames [len(v)-n-1, len(v)-1] ``` Introducing a delay so that the clips start at a temporal offset from v ```python for vc in v.stream().clip(n=8, m=3, delay=1): # yields video vc with frames [1,9] # then video vc with frames [4,12] ... ``` Args: n: [int] the length of the clip in frames m: [int] the stride between clips in frames delay: [int] The temporal delay in frames for the clip, must be less than n and >= 0 continuous: [bool] if true, then yield None for the sequential frames not aligned with a stride so that a clip is yielded on every frame activities: [bool] if false, then activities from the source video are not copied into the clip tracks: [bool] if false, then tracks from the source video are not copied into the clip Returns: An iterator that yields `vipy.video.Video` objects each of length n with startframe += m, starting at frame=delay, such that each video contains the tracks and activities (if requested) for this clip sourced from the shared stream video. .. note:: This iterator runs in a thread to help speed up fetching of frames for GPU I/Oe bound operations """ assert isinstance(n, int) and n>0, "Clip length must be a positive integer" assert isinstance(m, int) and m>0, "Clip stride must be a positive integer" assert isinstance(delay, int) and delay >= 0 and delay < n, "Clip delay must be a positive integer less than n" assert not self._buffered or 3*n < self._bufsize, "increase buffered stream size (bufsize) from %d to >%d" % (self._bufsize, 3*n) def _f_threadloop(v, streamiter, queue, event, ragged, m, n): (frames, newframes) = ([], []) for (k,im) in enumerate(streamiter()): newframes.append(im) if len(newframes) >= m and len(frames)+len(newframes) >= n: # Use frameindex+1 so that we include (0,1), (1,2), (2,3), ... for n=2, m=1 # The delay shifts the clip +delay frames (1,2,3), (3,4,5), ... for n=3, m=2, delay=1 frames.extend(newframes) (frames, newframes) = (frames[-n:], []) queue.put( (v.clear().clone(shallow=True).fromframes(frames), k) ) # fromframes() triggers array copy of frames elif continuous: queue.put( (None, k) ) if ragged and len(newframes) > 0: queue.put( (v.clear().clone(shallow=True).fromframes(newframes), k) ) # fromframes() triggers array copy of newframes queue.put( (None, None) ) event.wait() vc = self._video.clone(flushfilter=True).clear().nourl().nofilename() q = queue.Queue(3) # warning: if this queuesize*n > buffersize, then there can be a deadlock e = threading.Event() t = threading.Thread(target=_f_threadloop, args=(vc, self.__iter__, q, e, ragged, m, n), daemon=True, name='vipy.video.Stream.clip') t.start() f_copy_annotations = lambda v, k, n: (v.activities([a.clone().offset(-(k-(n-1))).truncate(0,n-1) for (ak,a) in self._video.activities().items() if a.during_interval(k-(n-1), k, inclusive=False)] if activities else []) .tracks([t.clone(k-(n-1), k).offset(-(k-(n-1))).truncate(0,n-1) for (tk,t) in self._video.tracks().items() if t.during_interval(k-(n-1), k)] if tracks else []) if (v is not None and isinstance(v, vipy.video.Scene)) else v) try: while True: # The queue can be filled with more expensive copies and clones to speed up iteration when waiting for GPU I/O (v, k) = q.get() q.task_done() if k is None: e.set() break else: # This copy must be done sychronized at frame k with the current state of the annotations in the shared self._video yield f_copy_annotations(v, k, len(v)) if v is not None else None finally: e.set() # thread cleanup on early exit del q,t
def frame(self, delay=0)
-
Stream individual frames of video with negative offset n frames to the stream head. If delay=30, this will return a frame 30 frames ago
Expand source code Browse git
def frame(self, delay=0): """Stream individual frames of video with negative offset n frames to the stream head. If delay=30, this will return a frame 30 frames ago""" assert isinstance(delay, int) and delay >= 0, "Frame delay must be positive integer" n = -delay frames = [] i = 0 for (k,im) in enumerate(self): frames.append( (k,im) ) (kout, imout) = frames[0] frames.pop(0) if len(frames) > abs(n) else None i = k yield self._video.frame(kout, imout.array()) if len(frames) == delay else None # refetch for track interpolation
def framerate(self)
-
Expand source code Browse git
def framerate(self): return self._video.framerate()
def write(self, im, flush=False)
-
Write individual frames to write stream
Expand source code Browse git
def write(self, im, flush=False): """Write individual frames to write stream""" assert isinstance(im, vipy.image.Image) if self._shape is None: self._shape = im.shape() assert im.channels() == 3, "RGB frames required" self.__enter__() assert self._write_pipe is not None, "Write stream cannot be initialized" assert im.shape() == self._shape, "Shape cannot change during writing" self._write_pipe.stdin.write(im.array().astype(np.uint8).tobytes()) if flush: self._write_pipe.stdin.flush() # do we need this? if isinstance(im, vipy.image.Scene) and len(im.objects()) > 0 and isinstance(self._video, vipy.video.Scene): for obj in im.objects(): self._video.add(obj, frame=self._writeindex, rangecheck=False) self._writeindex += 1 # assumes that the source image is at the appropriate frame rate for this video
class Video (filename=None, url=None, framerate=30.0, attributes=None, array=None, colorspace=None, startframe=None, endframe=None, startsec=None, endsec=None, frames=None, probeshape=False)
-
vipy.video.Video class
The vipy.video class provides a fluent, lazy interface for representing, transforming and visualizing videos. The following constructors are supported:
vid = vipy.video.Video(filename='/path/to/video.ext')
Valid video extensions are those that are supported by ffmpeg ['.avi','.mp4','.mov','.wmv','.mpg', 'mkv', 'webm'].
vid = vipy.video.Video(url='https://www.youtube.com/watch?v=MrIN959JuV8') vid = vipy.video.Video(url='http://path/to/video.ext', filename='/path/to/video.ext')
Youtube URLs are downloaded to a temporary filename, retrievable as vid.download().filename(). If the environment variable 'VIPY_CACHE' is defined, then videos are saved to this directory rather than the system temporary directory. If a filename is provided to the constructor, then that filename will be used instead of a temp or cached filename. URLs can be defined as an absolute URL to a video file, or to a site supported by 'youtube-dl' (https://ytdl-org.github.io/youtube-dl/supportedsites.html)
vid = vipy.video.Video(url='s3://BUCKET.s3.amazonaws.com/PATH/video.ext')
If you set the environment variables VIPY_AWS_ACCESS_KEY_ID and VIPY_AWS_SECRET_ACCESS_KEY, then this will download videos directly from S3 using boto3 and store in VIPY_CACHE. Note that the URL protocol should be 's3' and not 'http' to enable keyed downloads.
vid = vipy.video.Video(array=array, colorspace='rgb')
The input 'array' is an NxHxWx3 numpy array corresponding to an N-length list of HxWx3 uint8 numpy array which is a single frame of pre-loaded video Note that some video transformations are only available prior to load(), and the array() is assumed immutable after load().
frames = [im for im in vipy.video.RandomVideo()] vid = vipy.video.Video(frames=frames)
The input can be an RTSP video stream. Note that streaming is most efficiently performed using
Scene
. The URL must contain the 'rtsp://' url scheme.
You can experiment with this using the free Periscope H.264 RTSP App (https://apps.apple.com/us/app/periscope-hd-h-264-rtsp-cam/id1095600218)vipy.video.Scene(url='rtsp://127.0.0.1:8554/live.sdp').show() for im in vipy.video.Scene(url='rtsp://127.0.0.1:8554/live.sdp').stream(): print(im)
See also 'pip install heyvi'
Args
filename
- [str] The path to a video file.
url
- [str] The URL to a video file. If filename is not provided, then a random filename is assigned in VIPY_CACHE on download
framerate
- [float] The framerate of the video file. This is required. You can introspect this using ffprobe.
attributes
- [dict] A user supplied dictionary of metadata about this video.
colorspace
- [str] Must be in ['rgb', 'float']
array
- [numpy] An NxHxWxC numpy array for N frames each HxWxC shape
startframe
- [int] A start frame to clip the video
endframe
- [int] An end frame to clip the video
startsec
- [float] A start time in seconds to clip the video (this requires setting framerate)
endsec
- [float] An end time in seconds to clip the video (this requires setting framerate)
frames
- [list of
vipy.image.Image
] A list of frames in the video probeshape
- [bool] If true, then probe the shape of the video from ffprobe to avoid an explicit preview later. This can speed up loading in some circumstances.
Expand source code Browse git
class Video(object): """ vipy.video.Video class The vipy.video class provides a fluent, lazy interface for representing, transforming and visualizing videos. The following constructors are supported: ```python vid = vipy.video.Video(filename='/path/to/video.ext') ``` Valid video extensions are those that are supported by ffmpeg ['.avi','.mp4','.mov','.wmv','.mpg', 'mkv', 'webm']. ```python vid = vipy.video.Video(url='https://www.youtube.com/watch?v=MrIN959JuV8') vid = vipy.video.Video(url='http://path/to/video.ext', filename='/path/to/video.ext') ``` Youtube URLs are downloaded to a temporary filename, retrievable as vid.download().filename(). If the environment variable 'VIPY_CACHE' is defined, then videos are saved to this directory rather than the system temporary directory. If a filename is provided to the constructor, then that filename will be used instead of a temp or cached filename. URLs can be defined as an absolute URL to a video file, or to a site supported by 'youtube-dl' (https://ytdl-org.github.io/youtube-dl/supportedsites.html) ```python vid = vipy.video.Video(url='s3://BUCKET.s3.amazonaws.com/PATH/video.ext') ``` If you set the environment variables VIPY_AWS_ACCESS_KEY_ID and VIPY_AWS_SECRET_ACCESS_KEY, then this will download videos directly from S3 using boto3 and store in VIPY_CACHE. Note that the URL protocol should be 's3' and not 'http' to enable keyed downloads. ```python vid = vipy.video.Video(array=array, colorspace='rgb') ``` The input 'array' is an NxHxWx3 numpy array corresponding to an N-length list of HxWx3 uint8 numpy array which is a single frame of pre-loaded video Note that some video transformations are only available prior to load(), and the array() is assumed immutable after load(). ```python frames = [im for im in vipy.video.RandomVideo()] vid = vipy.video.Video(frames=frames) ``` The input can be an RTSP video stream. Note that streaming is most efficiently performed using `vipy.video.Scene`. The URL must contain the 'rtsp://' url scheme. You can experiment with this using the free Periscope H.264 RTSP App (https://apps.apple.com/us/app/periscope-hd-h-264-rtsp-cam/id1095600218) ```python vipy.video.Scene(url='rtsp://127.0.0.1:8554/live.sdp').show() for im in vipy.video.Scene(url='rtsp://127.0.0.1:8554/live.sdp').stream(): print(im) ``` See also 'pip install heyvi' Args: filename: [str] The path to a video file. url: [str] The URL to a video file. If filename is not provided, then a random filename is assigned in VIPY_CACHE on download framerate: [float] The framerate of the video file. This is required. You can introspect this using ffprobe. attributes: [dict] A user supplied dictionary of metadata about this video. colorspace: [str] Must be in ['rgb', 'float'] array: [numpy] An NxHxWxC numpy array for N frames each HxWxC shape startframe: [int] A start frame to clip the video endframe: [int] An end frame to clip the video startsec: [float] A start time in seconds to clip the video (this requires setting framerate) endsec: [float] An end time in seconds to clip the video (this requires setting framerate) frames: [list of `vipy.image.Image`] A list of frames in the video probeshape: [bool] If true, then probe the shape of the video from ffprobe to avoid an explicit preview later. This can speed up loading in some circumstances. """ def __init__(self, filename=None, url=None, framerate=30.0, attributes=None, array=None, colorspace=None, startframe=None, endframe=None, startsec=None, endsec=None, frames=None, probeshape=False): self._url = None self._filename = None self._array = None self._colorspace = None self._ffmpeg = None self._framerate = None self.attributes = attributes if attributes is not None else {} assert isinstance(self.attributes, dict), "Attributes must be a python dictionary" assert filename is not None or url is not None or array is not None or frames is not None, 'Invalid constructor - Requires "filename", "url" or "array" or "frames"' if (url is None and isurl(filename)) or (url == filename and (isRTSPurl(url) or isRTMPurl(url))): (url, filename) = (filename, None) # correct for common typo vipy.video.Video('rtsp://....') assert not isurl(filename) # FFMPEG installed? if not has_ffmpeg: warnings.warn('"ffmpeg" executable not found on path, this is required for vipy.video - Install from http://ffmpeg.org/download.html') # Constructor clips startframe = startframe if startframe is not None else (0 if endframe is not None else startframe) assert (startsec is not None and endsec is not None) or (startsec is None and endsec is None), "Invalid input - (startsec,endsec) are both required" (self._startframe, self._endframe) = (None, None) # __repr__ only (self._startsec, self._endsec) = (None, None) # __repr__ only (legacy, no longer used) # Input filenames if url is not None: assert isurl(url), 'Invalid URL "%s" ' % url self._url = url if filename is not None: self._filename = os.path.normpath(os.path.expanduser(filename)) elif self._url is not None: if isS3url(self._url): self._filename = totempdir(self._url) # Preserve S3 Object ID elif isRTSPurl(self._url): # https://ffmpeg.org/ffmpeg-protocols.html#rtsp self._filename = self._url elif isRTMPurl(self._url): # https://ffmpeg.org/ffmpeg-protocols.html#rtmp self._filename = self._url elif isvideourl(self._url): self._filename = templike(self._url) elif isyoutubeurl(self._url): self._filename = os.path.join(tempdir(), '%s' % (self._url.split('?')[1].split('&')[0] if '?' in self._url else self._url.split('/')[-1])) else: self._filename = totempdir(self._url) if vipy.globals.cache() is not None and self._filename is not None and not self.isstreaming(): self._filename = os.path.join(remkdir(vipy.globals.cache()), filetail(self._filename)) # Initial video shape: useful to avoid preview() if not self.isstreaming(): self._ffmpeg = ffmpeg.input(self.filename()) elif isRTSPurl(url): self._ffmpeg = ffmpeg.input(self.filename(), rtsp_flags='prefer_tcp') elif isRTMPurl(url): rtmp = urlparse(url) # rtmp://HOST:PORT/PATH?listen self._ffmpeg = ffmpeg.input(urlunparse( (rtmp.scheme,rtmp.netloc,rtmp.path,None,None,None) ), listen='1') if 'listen' in rtmp.query else ffmpeg.input(self.filename()) if probeshape and (frames is None and array is None) and has_ffprobe and self.hasfilename(): self.shape(self.probeshape()) else: self._shape = None # preview() on shape() # Video filter chain if framerate is not None: if array is None and frames is None: self.framerate(framerate) self._framerate = framerate if startframe is not None: self.clip(startframe, endframe) if startsec is not None: # WARNING: if the user does not supply the correct framerate for the video, then this will be wrong since these are converted to frames self.clip(int(round(startsec/self.framerate())), int(round(endsec/self.framerate())) if endsec is not None else None) # Array input assert not (array is not None and frames is not None) if array is not None: self.array(array) self.colorspace(colorspace) elif frames is not None and (isinstance(frames, list) or isinstance(frames, tuple)) and all([isinstance(im, vipy.image.Image) for im in frames]): self.fromframes(frames) elif frames is not None and (isinstance(frames, list) or isinstance(frames, tuple)) and all([isinstance(im, str) and os.path.exists(im) for im in frames]): self.fromframes([vipy.image.Image(filename=f) for f in frames]) elif frames is not None and (isinstance(frames, str) and os.path.isdir(frames)): self.fromdirectory(frames) @classmethod def cast(cls, v): """Cast a conformal video object to a `vipy.video.Video` object. This is useful for downcasting superclasses. ```python vs = vipy.video.RandomScene() v = vipy.video.Video.cast(vs) ``` """ assert isinstance(v, vipy.video.Video), "Invalid input - must be derived from vipy.video.Video" v.__class__ = vipy.video.Video return v @classmethod def from_json(cls, s): """Import a json string as a `vipy.video.Video` object. This will perform a round trip from a video to json and back to a video object. This same operation is used for serialization of all vipy objects to JSON for storage. ```python v = vipy.video.Video.from_json(vipy.video.RandomVideo().json()) ``` """ d = json.loads(s) if not isinstance(s, dict) else s d = {k.lstrip('_'):v for (k,v) in d.items()} # prettyjson (remove "_" prefix to attributes) v = cls(filename=d['filename'], url=d['url'], framerate=d['framerate'], array=np.array(d['array']) if d['array'] is not None else None, colorspace=d['colorspace'], attributes=d['attributes'], startframe=d['startframe'], endframe=d['endframe'], startsec=d['startsec'], endsec=d['endsec']) v._ffmpeg = v._from_ffmpeg_commandline(d['ffmpeg']) return v.filename(d['filename']) if d['filename'] is not None else v.nofilename() def __repr__(self): strlist = [] if self.isloaded(): strlist.append("height=%d, width=%d, frames=%d, color=%s" % (self.height(), self.width(), len(self), self.colorspace())) if self.filename() is not None: strlist.append('filename="%s"' % self.filename()) if self.hasurl(): strlist.append('url="%s"' % self.url()) if not self.isloaded() and self._startframe is not None and self._endframe is not None: strlist.append('clip=(%d,%d)' % (self._startframe, self._endframe)) if not self.isloaded() and self._startframe is not None and self._endframe is None: strlist.append('clip=(%d,)' % (self._startframe)) if self._framerate is not None: strlist.append('fps=%1.1f' % float(self._framerate)) return str('<vipy.video: %s>' % (', '.join(strlist))) def __len__(self): """Number of frames in the video if loaded, else zero. .. notes:: Do not automatically trigger a load, since this can interact in unexpected ways with other tools that depend on fast __len__() """ if not self.isloaded(): warnings.warn('Load() video to see number of frames - Returning zero') # should this just throw an exception? return len(self.array()) if self.isloaded() else 0 def __getitem__(self, k): """Alias for `vipy.video.Video.frame`""" return self.frame(k) def metadata(self, k=None): """Return a dictionary of metadata about this video. Args: k [str]: If provided, return just the specified key of the attributes dictionary, otherwise return the attributes dictionary Returns: The 'attributes' dictionary, or just the value for the provided key k if provided """ return self.attributes if k is None else self.attributes[k] def sanitize(self): """Remove all private keys from the attributes dictionary. The attributes dictionary is useful storage for arbitrary (key,value) pairs. However, this storage may contain sensitive information that should be scrubbed from the video before serialization. As a general rule, any key that is of the form '__keyname' prepended by two underscores is a private key. This is analogous to private or reserved attributes in the python lanugage. Users should reserve these keynames for those keys that should be sanitized and removed before any seerialization of this object. ```python assert self.setattribute('__mykey', 1).sanitize().hasattribute('__mykey') == False ``` """ if self._has_private_attribute(): self.attributes = {k:v for (k,v) in self.attributes.items() if not k.startswith('__')} return self def videoid(self, newid=None): """Return a unique video identifier for this video, as specified in the 'video_id' attribute, or by SHA1 hash of the `vipy.video.Video.filename` and `vipy.video.Video.url`. Args: newid: [str] If not None, then update the video_id as newid. Returns: The video ID if newid=None else self .. note:: - If the video filename changes (e.g. from transformation), and video_id is not set in self.attributes, then the video ID will change. - If a video does not have a filename or URL or a video ID in the attributes, then this will return None - To preserve a video ID independent of transformations, set self.setattribute('video_id', ${MY_ID}), or pass in newid """ if newid is not None: self.setattribute('video_id', newid) return self else: return self.attributes['video_id'] if 'video_id' in self.attributes else (hashlib.sha1(str(str(self.filename())+str(self.url())).encode("UTF-8")).hexdigest() if (self.filename() is not None or self.url() is not None) else None) def frame(self, k=0, img=None, t=None): """Return the kth frame as an `vipy.image Image` object""" k = int(self.framerate()*t) if t is not None else k assert isinstance(k, int) and k>=0, "Frame index must be non-negative integer" return Image(array=img if img is not None else (self._array[k] if self.isloaded() else self.preview(k).array()), colorspace=self.colorspace()) def __iter__(self): """Iterate over video, yielding read only frames. ```python for im in vipy.video.RandomScene(): print(im) ``` """ return self.stream().__iter__() def store(self): """Store the current video file as an attribute of this object. Useful for archiving an object to be fully self contained without any external references. ```python v == v.store().restore(v.filename()) ``` .. note:: -Remove this stored video using unstore() -Unpack this stored video and set up the video chains using restore() -This method is more efficient than load() followed by pkl(), as it stores the encoded video as a byte string. -Useful for creating a single self contained object for distributed processing. """ assert self.hasfilename(), "Video file not found. Try saveas() first to create a video file to store." with open(self.filename(), 'rb') as f: self.attributes['__video__'] = f.read() return self def unstore(self): """Delete the currently stored video from `vipy.video.Video.store""" return self.delattribute('__video__') def restore(self, filename): """Save the currently stored video as set using `vipy.video.Video.store` to filename, and set up filename""" assert self.hasattribute('__video__'), "Video not stored" with open(filename, 'wb') as f: f.write(self.attributes['__video__']) return self.filename(filename) @classmethod def concatenate(cls, videos, outfile, framerate=30, youtube_chapters=None): """Temporally concatenate a sequence of videos into a single video stored in outfile. ```python (v1, v2, v3) = (vipy.video.RandomVideo(128,128,32), vipy.video.RandomVideo(128,128,32), vipy.video.RandomVideo(128,128,32)) vc = vipy.video.Video.concatenate((v1, v2, v3), 'concatenated.mp4', youtube_chapters=lambda v: v.category()) ``` In this example, vc will point to concatenated.mp4 which will contain (v1,v2,v3) concatenated temporally . Args: videos: a single video or an iterable of videos of type `vipy.video.Video` or an iterable of video files outfile: the output filename to store the concatenation. youtube_chapters: [bool, callable]: If true, output a string that can be used to define the start and end times of chapters if this video is uploaded to youtube. The string output should be copied to the youtube video description in order to enable chapters on playback. This argument will default to the string representation ofo the video, but you may also pass a callable of the form: 'youtube_chapters=lambda v: str(v)' which will output the provided string for each video chapter. A useful lambda is 'youtube_chapters=lambda v: v.category()' framerate: [float]: The output frame rate of outfile Returns: A `vipy.video.Video` object with filename()=outfile, such that outfile contains the temporal concatenation of pixels in (self, videos). .. note:: - self will not be modified, this will return a new `vipy.video.Video` object. - All videos must be the same shape(). If the videos are different shapes, you must pad them to a common size equal to self.shape(). Try `vipy.video.Video.zeropadlike`. - The output video will be at the framerate of self.framerate(). - if you want to concatenate annotations, call `vipy.video.Scene.annotate` first on the videos to save the annotations into the pixels, then concatenate. """ assert len(tolist(videos))>0 and (all([isinstance(v, vipy.video.Video) for v in tolist(videos)]) or all([os.path.exists(f) and vipy.util.isvideofile(f) for f in tolist(videos)])) vi = tolist(videos) if all([isinstance(v, vipy.video.Video) for v in tolist(videos)]) else [cls(filename=f) for f in tolist(videos)] assert all([vij.shape() == vik.shape() for vij in vi for vik in vi]), "Video shapes must all the same, try padding" vo = cls(filename=outfile, framerate=vi[0].framerate()) with vo.stream(overwrite=True) as s: for v in vi: for im in v.clone().framerate(framerate).stream(): s.write(im) if youtube_chapters is not None: f = youtube_chapters if callable(youtube_chapters) else lambda v: str(v).replace('<','').replace('>','') # angle brackets not allowed print('[vipy.video.concatenate]: Copy the following into the video Description after uploading the videofile "%s" to YouTube to enable chapters on playback.\n' % outfile) print('\n'.join(['%s %s' % (vipy.util.seconds_to_MMSS_colon_notation(int(s)), str(f(v))) for (s,v) in zip(np.cumsum([0] + [v.duration() for v in vi][:-1]), vi)])); print('\n') if any([v.duration() < 10 for v in vi]): warnings.warn('YouTube chapters must be a minimum duration of 10 seconds') return vo def stream(self, write=False, overwrite=False, queuesize=512, bitrate=None, buffered=False, rebuffered=False, bufsize=256): """Iterator to yield groups of frames streaming from video. A video stream is a real time iterator to read or write from a video. Streams are useful to group together frames into clips that are operated on as a group. The following use cases are supported: ```python v = vipy.video.RandomScene() ``` Stream individual video frames lagged by 10 frames and 20 frames ```python for (im1, im2) in zip(v.stream().frame(n=-10), v.stream().frame(n=-20)): print(im1, im2) ``` Stream overlapping clips such that each clip is a video n=16 frames long and starts at frame i, and the next clip is n=16 frames long and starts at frame i=i+m ```python for vc in v.stream().clip(n=16, m=4): print(vc) ``` Stream non-overlapping batches of frames such that each clip is a video of length n and starts at frame i, and the next clip is length n and starts at frame i+n ```python for vb in v.stream().batch(n=16): print(vb) ``` Create a write stream to incrementally add frames to long video. ```python vi = vipy.video.Video(filename='/path/to/output.mp4') vo = vipy.video.Video(filename='/path/to/input.mp4') with vo.stream(write=True) as s: for im in vi.stream(): s.write(im) # manipulate pixels of im, if desired ``` Create a 480p YouTube live stream from an RTSP camera at 5Hz ```python vo = vipy.video.Scene(url='rtmp://a.rtmp.youtube.com/live2/$SECRET_STREAM_KEY') vi = vipy.video.Scene(url='rtsp://URL').framerate(5) with vo.framerate(5).stream(write=True, bitrate='1000k') as s: for im in vi.framerate(5).resize(cols=854, rows=480): s.write(im) ``` Args: write: [bool] If true, create a write stream overwrite: [bool] If true, and the video output filename already exists, overwrite it bufsize: [int] The maximum queue size for the ffmpeg pipe thread in the primary iterator. The queue size is the maximum size of pre-fetched frames from the ffmpeg pip. This should be big enough that you are never waiting for queue fills bitrate: [str] The ffmpeg bitrate of the output encoder for writing, written like '2000k' bufsize: [int] The maximum size of the stream buffer in frames. The stream buffer length should be big enough so that all iterators can yield before deleting old frames Returns: A Stream object ..note:: Using this iterator may affect PDB debugging due to stdout/stdin redirection. Use ipdb instead. """ return Stream(self, queuesize=queuesize, write=write, overwrite=overwrite, bitrate=bitrate, buffered=buffered, rebuffered=rebuffered, bufsize=bufsize) # do not clone def clear(self): """no-op for `vipy.video.Video` object, used only for `vipy.video.Scene`""" return self def bytes(self): """Return a bytes representation of the video file""" assert self.hasfilename(), "Invalid filename" with open(self.filename(), 'rb') as f: data = io.BytesIO(f.read()) return str(data.read()).encode('UTF-8') def frames(self): """Alias for __iter__()""" return self.__iter__() def framelist(self): return list(self.frames()) def _update_ffmpeg_seek(self, timestamp_in_seconds=0, offset=0): if timestamp_in_seconds == 0 and offset == 0: return self nodes = ffmpeg.nodes.get_stream_spec_nodes(self._ffmpeg) sorted_nodes, outgoing_edge_maps = ffmpeg.dag.topo_sort(nodes) for n in sorted_nodes: if 'input' == n.__dict__['name']: if 'ss' not in n.__dict__['kwargs']: n.__dict__['kwargs']['ss'] = 0 if timestamp_in_seconds == 0: n.__dict__['kwargs']['ss'] = n.__dict__['kwargs']['ss'] + offset else: n.__dict__['kwargs']['ss'] = timestamp_in_seconds + offset return self raise ValueError('invalid ffmpeg argument "%s" -> "%s"' % ('ss', timestamp_in_seconds)) def _update_ffmpeg(self, argname, argval, node_name=None): """Update the ffmpeg filter chain to overwrite the (argname, argval) elements. Useful for fine-tuning a filter chain without rewwriring the whole thing. """ nodes = ffmpeg.nodes.get_stream_spec_nodes(self._ffmpeg) sorted_nodes, outgoing_edge_maps = ffmpeg.dag.topo_sort(nodes) for n in sorted_nodes: if argname in n.__dict__['kwargs'] or node_name == n.__dict__['name']: n.__dict__['kwargs'][argname] = argval return self raise ValueError('invalid ffmpeg argument "%s" -> "%s"' % (argname, argval)) def _ffmpeg_commandline(self, f=None): """Return the ffmpeg command line string that will be used to process the video""" cmd = f.compile() if f is not None else self._ffmpeg.output('dummyfile').compile() for (k,c) in enumerate(cmd): if c is None: cmd[k] = str(c) elif 'filter' in c: cmd[k+1] = '"%s"' % str(cmd[k+1]) elif 'map' in c: cmd[k+1] = '"%s"' % str(cmd[k+1]) return str(' ').join(cmd) def commandline(self): """Return the equivalent ffmpeg command line string that will be used to transcode the video. This is useful for introspecting the complex filter chain that will be used to process the video. You can try to run this command line yourself for debugging purposes, by replacing 'dummyfile' with an appropriately named output file. """ return self._ffmpeg_commandline() def _from_ffmpeg_commandline(self, cmd, strict=False): """Convert the ffmpeg command line string (e.g. from `vipy.video.Video.commandline`) to the corresponding ffmpeg-python filter chain and update self""" args = copy.copy(cmd).replace(str(self.filename()), 'FILENAME').split(' ') # filename may contain spaces assert args[0] == 'ffmpeg', "Invalid FFMEG commmand line '%s'" % cmd assert args[1] == '-i' or (args[3] == '-i' and (args[1] == '-ss' or args[1] == '-rtsp_flags' or args[1] == '-listen')), "Invalid FFMEG commmand line '%s'" % cmd assert args[-1] == 'dummyfile', "Invalid FFMEG commmand line '%s'" % cmd assert len(args) >= 4, "Invalid FFMEG commmand line '%s'" % cmd if args[1] == '-ss': timestamp_in_seconds = float(args[2]) timestamp_in_seconds = int(timestamp_in_seconds) if timestamp_in_seconds == 0 else timestamp_in_seconds # 0.0 -> 0 args = [args[0]] + args[3:] f = ffmpeg.input(args[2].replace('FILENAME', self.filename()), ss=timestamp_in_seconds) # restore filename, set offset time self._startframe = int(round(timestamp_in_seconds*self.framerate())) # necessary for clip() and __repr__ elif args[1] == '-rtsp_flags': rtsp_flag = str(args[2]) args = [args[0]] + args[3:] f = ffmpeg.input(args[2].replace('FILENAME', self.filename()), rtsp_flags=rtsp_flag) # restore filename, set rtsp flag elif args[1] == '-listen': listenval = str(args[2]) args = [args[0]] + args[3:] f = ffmpeg.input(args[2].replace('FILENAME', self.filename()), listen=listenval) # restore filename, set RTMP listener else: f = ffmpeg.input(args[2].replace('FILENAME', self.filename())) # restore filename if len(args) > 4: assert args[3] == '-filter_complex', "Invalid FFMEG commmand line '%s'" % cmd assert args[4][0] == '"' and args[4][-1] == '"', "Invalid FFMEG commmand line '%s'" % cmd filterargs = args[4][1:-1].split(';') for a in filterargs: assert a.count(']') == 2 and a.count('[') == 2 kwstr = a.split(']', maxsplit=1)[1].split('[', maxsplit=1)[0] if kwstr.count('=') == 0: f = f.filter(kwstr) else: (a, kw) = ([], {}) (filtername, kwstr) = kwstr.split('=', maxsplit=1) for s in kwstr.split(':'): if s.count('=') > 0: (k,v) = s.split('=') kw[k] = v else: a.append(s) if 'end' in kw: self._endframe = (self._startframe if self._startframe is not None else 0) + int(round(float(kw['end'])*self.framerate())) # for __repr__ if 'start' in kw: pass if 'start_frame' in kw or 'end_frame' in kw: f = f.setpts('PTS-STARTPTS') # reset timestamp to 0 before trim filter in seconds if 'end_frame' in kw: self._endframe = (self._startframe if self._startframe is not None else 0) + int(kw['end_frame']) # for __repr__ kw['end'] = int(kw['end_frame'])/self.framerate() # convert end_frame to end (legacy) del kw['end_frame'] # use only end and not end frame if 'start_frame' in kw: self._startframe = (self._startframe if self._startframe is not None else 0) + int(kw['start_frame']) # for __repr__ kw['start'] = int(kw['start_frame'])/self.framerate() # convert start_frame to start (legacy) del kw['start_frame'] # use only start and not start_frame f = f.filter(filtername, *a, **kw) if strict: assert self._ffmpeg_commandline(f.output('dummyfile')) == cmd, "FFMPEG command line '%s' != '%s'" % (self._ffmpeg_commandline(f.output('dummyfile')), cmd) return f def _isdirty(self): """Has the FFMPEG filter chain been modified from the default? If so, then ffplay() on the video file will be different from self.load().play()""" return '-filter_complex' in self._ffmpeg_commandline() def probeshape(self): """Return the (height, width) of underlying video file as determined from ffprobe .. warning:: this does not take into account any applied ffmpeg filters. The shape will be the (height, width) of the underlying video file. """ p = self.probe() assert len(p['streams']) > 0 return (p['streams'][0]['height'], p['streams'][0]['width']) def duration_in_seconds_of_videofile(self): """Return video duration of the source filename (NOT the filter chain) in seconds, requires ffprobe. Fetch once and cache. .. notes:: This is the duration of the source video and NOT the duration of the filter chain. If you load(), this may be different duration depending on clip() or framerate() directives. """ filehash = hashlib.md5(str(self.downloadif().filename()).encode()).hexdigest() if self.hasattribute('_duration_in_seconds_of_videofile') and self.attributes['__duration_in_seconds_of_videofile']['filehash'] == filehash: return self.attributes['__duration_in_seconds_of_videofile']['duration'] else: d = float(self.probe()['format']['duration']) self.attributes['__duration_in_seconds_of_videofile'] = {'duration':d, 'filehash':filehash} # for next time, private attribute return d def duration_in_frames_of_videofile(self): """Return video duration of the source video file (NOT the filter chain) in frames, requires ffprobe. .. notes:: This is the duration of the source video and NOT the duration of the filter chain. If you load(), this may be different duration depending on clip() or framerate() directives. """ return int(np.floor(self.duration_in_seconds_of_videofile()*self.framerate_of_videofile())) def duration(self, frames=None, seconds=None, minutes=None): """Return a video clipped with frame indexes between (0, frames) or (0,seconds*self.framerate()) or (0,minutes*60*self.framerate(). Return duration in seconds if no arguments are provided.""" if frames is None and seconds is None and minutes is None: return self.duration_in_seconds_of_videofile() if not self.isloaded() else (len(self) / self.framerate()) assert frames is not None or seconds is not None or minutes is not None frames = frames if frames is not None else ((int(seconds*self.framerate()) if seconds is not None else 0) + (int(minutes*60*self.framerate()) if minutes is not None else 0)) return self.clip(0, frames) def duration_in_frames(self): """Return the duration of the video filter chain in frames, equal to round(self.duration()*self.framerate()). Requires a probe() of the video to get duration""" return int(round(self.duration()*self.framerate())) def framerate_of_videofile(self): """Return video framerate in frames per second of the source video file (NOT the filter chain), requires ffprobe. """ p = self.probe() assert 'streams' in p and len(['streams']) > 0 fps = p['streams'][0]['avg_frame_rate'] return float(fps) if '/' not in fps else (float(fps.split('/')[0]) / float(fps.split('/')[1])) # fps='30/1' or fps='30.0' def resolution_of_videofile(self): """Return video resolution in (height, width) in pixels (NOT the filter chain), requires ffprobe. """ p = self.probe() assert 'streams' in p and len(['streams']) > 0 (H,W) = (p['streams'][0]['height'], p['streams'][0]['width']) # (height, width) in pixels return (W,H) if ('tags' in p['streams'][0] and 'rotate' in p['streams'][0]['tags'] and p['streams'][0]['tags']['rotate'] in ['90','270']) else (H,W) def probe(self, **kwargs): """Run ffprobe on the filename and return the result as a dictionary Args: Any keyword arguments supported by python-ffmpeg probe() - these are passed in as-is - for flags, use flag_name=None (e.g., show_frames=None) so that ffmpeg.probe() handles them correctly """ if not has_ffprobe: raise ValueError('"ffprobe" executable not found on path, this is optional for vipy.video - Install from http://ffmpeg.org/download.html') assert self.downloadif().hasfilename(), "Invalid video file '%s' for ffprobe" % self.filename() return ffmpeg.probe(self.filename(), **kwargs) def frame_meta(self, k=None): """Return the frame metadata of the underlying video file using ffprobe for all frames as a list of dicts, each list element corresponding to a frame. This is useful for extracting frame types (e.g. i-frames). Args: k [int]: Return only the frame metadata for frame index k (relative to framerate of source videofile, not filter chain) Returns: a list of metadata dicts (one per frame) or a single dict for the requested frame. .. notes:: - This will return a large amount of metadata for the entire source video (not the FFMPEG filter chain), use with caution. - To get frame metata for a filter chain use vipy.video.Video.savetemp().frame_meta(), which will save the video to a temporary file prior to extracting frame metadata """ d = self.probe(show_frames=None).get('frames') return d if k is None else d[k] def metaframe(self, k=None): """Alias for `vipy.video.Video.frame_meta`""" return self.frame_meta(k) def iframes(self): """Return a list of i-frame indexes (e.g. intra-frame, a video frame that is independent from other frames for decoding) in this video file. .. note:: - To return the i-frame indexes for the current filter chain use self.saveas().iframes() to save to a temporary file prior to i-frame index extraction. - To extract the i-frame itself, use [self.frame(k) for k in self.iframes()] """ return [k for (k,d) in enumerate(self.metaframe()) if d['pict_type'] == 'I'] def print(self, prefix='', verbose=True, sleep=None): """Print the representation of the video This is useful for debugging in long fluent chains. Sleep is useful for adding in a delay for distributed processing. Args: prefix: prepend a string prefix to the video __repr__ when printing. Useful for logging. verbose: Print out the video __repr__. Set verbose=False to just sleep sleep: Integer number of seconds to sleep[ before returning fluent [bool]: If true, return self else return None. This is useful for terminating long fluent chains in lambdas that return None Returns: The video object after sleeping """ if verbose: print(prefix+self.__repr__()) if sleep is not None: assert isinstance(sleep, int) and sleep > 0, "Sleep must be a non-negative integer number of seconds" time.sleep(sleep) return self def printif(self, b, prefix='', verbose=True, sleep=None): """Call `vipy.video.Video.print` if b=True. Useful for fluent chains to print periodically.""" assert isinstance(b, bool) return self.print(prefix=prefix, verbose=b, sleep=sleep) def __array__(self): """Called on np.array(self) for custom array container, (requires numpy >=1.16)""" return self.numpy() def dict(self): """Return a python dictionary containing the relevant serialized attributes suitable for JSON encoding.""" return self.json(encode=False) def __json__(self): """Serialization method for json package""" return self.json(encode=True) def json(self, encode=True): """Return a json representation of the video. Args: encode: If true, return a JSON encoded string using json.dumps Returns: A JSON encoded string if encode=True, else returns a dictionary object .. note:: If the video is loaded, then the JSON will not include the pixels. Try using `vipy.video.Video.store` to serialize videos, or call `vipy.video.Video.flush` first. """ if self.isloaded(): warnings.warn("JSON serialization of loaded video is inefficient. Try store()/restore()/unstore() instead to serialize videos as standalone objects efficiently, or flush() any loaded videos prior to serialization to quiet this warning.") d = {'filename':self._filename, 'url':self._url, 'framerate':self._framerate, 'array':None, 'colorspace':self._colorspace, 'attributes':self.attributes, 'startframe':self._startframe, 'endframe':self._endframe, 'endsec':self._endsec, 'startsec':self._startsec, 'ffmpeg':self._ffmpeg_commandline()} return json.dumps(d) if encode else d def take(self, n): """Return n frames from the clip uniformly spaced as numpy array Args: n: Integer number of uniformly spaced frames to return Returns: A numpy array of shape (n,W,H) .. warning:: This assumes that the entire video is loaded into memory (e.g. call `vipy.video.Video.load`). Use with caution. """ assert self.isloaded(), "load() is required before take()" dt = int(np.round(len(self._array) / float(n))) # stride return self._array[::dt][0:n] def framerate(self, fps=None): """Change the input framerate for the video and update frame indexes for all annotations Args: fps: [Float] frames per second to process the underlying video Returns: If fps is None, return the current framerate, otherwise set the framerate to fps """ if fps is None: return self._framerate elif float(fps) == self._framerate: return self else: assert not self.isloaded(), "Filters can only be applied prior to load()" if 'fps=' in self._ffmpeg_commandline(): self._update_ffmpeg('fps', float(fps)) # replace fps filter, do not add to it else: self._ffmpeg = self._ffmpeg.filter('fps', fps=float(fps), round='up') # create fps filter first time # if '-ss' in self._ffmpeg_commandline(): # No change is needed here. The seek is in seconds and is independent of the framerate # if 'trim' in self._ffmpeg_commandline(): # No change is needed here. The trim is in units of seconds which is independent of the framerate self._framerate = float(fps) return self def colorspace(self, colorspace=None): """Return or set the colorspace as ['rgb', 'bgr', 'lum', 'float']. This will not change pixels, only the colorspace interpretation of pixels.""" if colorspace is None: return self._colorspace elif self.isloaded(): assert str(colorspace).lower() in ['rgb', 'bgr', 'lum', 'float'] if self.array().dtype == np.float32: assert str(colorspace).lower() in ['float'] elif self.array().dtype == np.uint8: assert str(colorspace).lower() in ['rgb', 'bgr', 'lum'] if str(colorspace).lower() in ['lum']: assert self.channels() == 1, "Luminance colorspace must be one channel uint8" elif str(colorspace).lower() in ['rgb', 'bgr']: assert self.channels() == 3, "RGB or BGR colorspace must be three channel uint8" else: raise ValueError('Invalid array() type "%s" - only np.float32 or np.uint8 allowed' % str(self.array().dtype)) self._colorspace = str(colorspace).lower() return self def nourl(self): """Remove the `vipy.video.Video.url` from the video""" (self._url, self._urluser, self._urlpassword, self._urlsha1) = (None, None, None, None) return self def url(self, url=None, username=None, password=None, sha1=None): """Video URL and URL download properties""" if url is not None: self._url = url # note that this does not change anything else, better to use the constructor for this if url is not None and (isRTSPurl(url) or isRTMPurl(url)): self.filename(self._url) if username is not None: self._urluser = username # basic authentication if password is not None: self._urlpassword = password # basic authentication if sha1 is not None: self._urlsha1 = sha1 # file integrity if url is None and username is None and password is None and sha1 is None: return self._url else: return self def isloaded(self): """Return True if the video has been loaded""" return self._array is not None def isloadable(self, flush=True): """Return True if the video can be loaded successfully. This is useful for filtering bad videos or filtering videos that cannot be loaded using your current FFMPEG version. Args: flush: [bool] If true, flush the video after it loads. This will clear the video pixel buffer Returns: True if load() can be called without FFMPEG exception. If flush=False, then self will contain the loaded video, which is helpful to avoid load() twice in some conditions .. warning:: This requires loading and flushing the video. This is an expensive operation when performed on many videos and may result in out of memory conditions with long videos. Use with caution! Try `vipy.video.Video.canload` to test if a single frame can be loaded as a less expensive alternative. """ if not self.isloaded(): try: self.load() # try to load the whole thing if flush: self.flush() return True except: return False else: return True def canload(self, frame=0): """Return True if the video can be previewed at frame=k successfully. This is useful for filtering bad videos or filtering videos that cannot be loaded using your current FFMPEG version. .. notes:: This will only try to preview a single frame. This will not check if the entire video is loadable. Use `vipy.video.Video.isloadable` in this case This will hang if calling canload on a streaming URL. """ if not self.isloaded(): try: self.preview(framenum=frame) # try to preview return True except: return False else: return True def iscolor(self): """Is the video a three channel color video as returned from `vipy.video.Video.channels`?""" return self.channels() == 3 def isstreaming(self): return isRTSPurl(self._filename) or isRTMPurl(self._filename) def isgrayscale(self): """Is the video a single channel as returned from `vipy.video.Video.channels`?""" return self.channels() == 1 def hasfilename(self): """Does the filename returned from `vipy.video.Video.filename` exist?""" return self._filename is not None and (os.path.exists(self._filename) or isRTSPurl(self._filename) or isRTMPurl(self._filename)) def isdownloaded(self): """Does the filename returned from `vipy.video.Video.filename` exist, meaning that the url has been downloaded to a local file?""" return self._filename is not None and os.path.exists(self._filename) def hasurl(self): """Is the url returned from `vipy.video.Video.url` a well formed url?""" return self._url is not None and isurl(self._url) def islive(self): return self.hasurl() and (isRTSPurl(self._url) or isRTMPurl(self._url)) def array(self, array=None, copy=False): """Set or return the video buffer as a numpy array. Args: array: [np.array] A numpy array of size NxHxWxC = (frames, height, width, channels) of type uint8 or float32. copy: [bool] If true, copy the buffer by value instaed of by reference. Copied buffers do not share pixels. Returns: if array=None, return a reference to the pixel buffer as a numpy array, otherwise return the video object. """ if array is None: return self._array elif isnumpy(array): assert array.dtype == np.float32 or array.dtype == np.uint8, "Invalid input - array() must be type uint8 or float32" assert array.ndim == 4, "Invalid input array() must be of shape NxHxWxC, for N frames, of size HxW with C channels" self._array = np.copy(array) if copy else array if copy: self._array.setflags(write=True) # mutable iterators, triggers copy self.colorspace(None) # must be set with colorspace() after array() before _convert() return self else: raise ValueError('Invalid input - array() must be numpy array') def fromarray(self, array): """Alias for self.array(..., copy=True), which forces the new array to be a copy""" return self.array(array, copy=True) def fromdirectory(self, indir, sortkey=None): """Create a video from a directory of frames stored as individual image filenames. Given a directory with files: framedir/image_0001.jpg framedir/image_0002.jpg ```python vipy.video.Video(frames='/path/to/framedir') ``` """ return self.fromframes([vipy.image.Image(filename=f) for f in sorted(vipy.util.imlist(indir), key=sortkey)]) def fromframes(self, framelist, copy=True): """Create a video from a list of frames""" assert all([isinstance(im, vipy.image.Image) for im in framelist]), "Invalid input" return self.array(np.stack([im.load().array() if im.load().array().ndim == 3 else np.expand_dims(im.load().array(), 2) for im in framelist]), copy=copy).colorspace(framelist[0].colorspace()) def tonumpy(self): """Alias for numpy()""" return self.numpy() def mutable(self): """Return a video object with a writeable mutable frame array. Video must be loaded, triggers copy of underlying numpy array if the buffer is not writeable. Returns: This object with a mutable frame buffer in self.array() or self.numpy() """ assert self.isloaded() self._array = np.copy(self._array) if not self._array.flags['WRITEABLE'] else self._array # triggers copy self._array.setflags(write=True) # mutable iterators, torch conversion return self def numpy(self): """Convert the video to a writeable numpy array, triggers a load() and copy() as needed. Returns the numpy array.""" self.load() self._array = np.copy(self._array) if not self._array.flags['WRITEABLE'] else self._array # triggers copy self._array.setflags(write=True) # mutable iterators, torch conversion return self._array def zeros(self): self._array = 0*self.load()._array return self def reload(self): return self.clone(flush=True).load() def nofilename(self): self._filename = None self._update_ffmpeg('filename', None) return self def filename(self, newfile=None, copy=False, symlink=False): """Update video Filename with optional copy or symlink from existing file (self.filename()) to new file""" if newfile is None: return self._filename if not isurl(newfile): # Copy or symlink from the old filename to the new filename (if requested) newfile = os.path.normpath(os.path.expanduser(newfile)) if copy: assert self.hasfilename(), "File not found for copy" remkdir(filepath(newfile)) shutil.copyfile(self._filename, newfile) elif symlink: assert self.hasfilename(), "File not found for symlink" remkdir(filepath(newfile)) if os.path.islink(newfile) and os.path.abspath(os.readlink(newfile)) == os.path.normpath(os.path.abspath(os.path.expanduser(self.filename()))): pass # already points to the same place, nothing to do else: os.symlink(self._filename, newfile) # Update ffmpeg filter chain with new input node filename (this file may not exist yet) self._update_ffmpeg('filename', newfile) self._filename = newfile return self def abspath(self): """Change the path of the filename from a relative path to an absolute path (not relocatable)""" return self.filename(os.path.normpath(os.path.abspath(os.path.expanduser(self.filename())))) def relpath(self, parent=None, start=None): """Replace the filename with a relative path to parent (or current working directory if none). Usage: ```python v = vipy.video.Video(filename='/path/to/dataset/video/category/out.mp4') v.relpath(parent='/path/to/dataset') v.filename() == 'video/category/out.mp4' ``` If the current working directory is /path/to/dataset, and v.load() is called, the filename will be loaded. Args: parent [str]: A parent path of the current filename to remove and be relative to. If filename is '/path/to/video.mp4' then filename must start with parent, then parent will be remvoed from filename. start [str]: Return a relative filename starting from path start='/path/to/dir' that will create a relative path to this filename. If start='/a/b/c' and filename='/a/b/d/e/f.ext' then return filename '../d/e/f.ext' Returns: This video object with the filename changed to be a relative path """ assert parent is not None or start is not None if parent is not None: parent = parent if parent is not None else os.getcwd() assert parent in os.path.expanduser(self.filename()), "Parent path '%s' not found in abspath '%s'" % (parent, self.filename()) self.filename(PurePath(os.path.expanduser(self.filename())).relative_to(parent)) if start is not None: self.filename(os.path.join(os.path.relpath(os.path.expanduser(filepath(self.filename())), start), filetail(self.filename()))) return self def rename(self, newname): """Move the underlying video file preserving the absolute path, such that self.filename() == '/a/b/c.ext' and newname='d.ext', then self.filename() -> '/a/b/d.ext', and move the corresponding file""" newfile = os.path.join(filepath(self.filename()), newname) shutil.move(self.filename(), newfile) return self.filename(newfile) def filesize(self): """Return the size in bytes of the filename(), None if the filename() is invalid""" return os.path.getsize(self.filename()) if self.hasfilename() else None def downloadif(self, ignoreErrors=False, timeout=10, verbose=True, max_filesize='350m'): """Download URL to filename if the filename has not already been downloaded""" return self.download(ignoreErrors=ignoreErrors, timeout=timeout, verbose=verbose, max_filesize=max_filesize) if self.hasurl() and not self.isdownloaded() else self def download(self, ignoreErrors=False, timeout=10, verbose=True, max_filesize='350m'): """Download URL to filename provided by constructor, or to temp filename. Args: ignoreErrors: [bool] If true, show a warning and return the video object, otherwise throw an exception timeout: [int] An integer timeout in seconds for the download to connect verbose: [bool] If trye, show more verbose console output max_filesize: [str] A string of the form 'NNNg' or 'NNNm' for youtube downloads to limit the maximum size of a URL to '350m' 350MB or '12g' for 12GB. Returns: This video object with the video downloaded to the filename() """ if self._url is None and self._filename is not None: return self if self._url is None: raise ValueError('[vipy.video.download]: No URL to download') elif not isurl(str(self._url)): raise ValueError('[vipy.video.download]: Invalid URL "%s" ' % self._url) try: url_scheme = urllib.parse.urlparse(self._url)[0] if isyoutubeurl(self._url): f = self._filename if filefull(self._filename) is None else filefull(self._filename) vipy.videosearch.download(self._url, f, writeurlfile=False, skip=ignoreErrors, verbose=verbose, max_filesize=max_filesize) for ext in ['mkv', 'mp4', 'webm']: f = '%s.%s' % (self.filename(), ext) if os.path.exists(f): self.filename(f) # change the filename to match the youtube extension break if not self.hasfilename(): raise ValueError('Downloaded file not found "%s.*"' % self.filename()) elif url_scheme in ['http', 'https'] and (isvideourl(self._url) or iswebp(self._url)): vipy.downloader.download(self._url, self._filename, verbose=verbose, timeout=timeout, sha1=None, username=None, password=None) elif url_scheme == 'file': shutil.copyfile(self._url, self._filename) elif url_scheme == 's3': if self.filename() is None: self.filename(totempdir(self._url)) if vipy.globals.cache() is not None: self.filename(os.path.join(remkdir(vipy.globals.cache()), filetail(self._url))) vipy.downloader.s3(self.url(), self.filename(), verbose=verbose) elif url_scheme == 'scp': if self.filename() is None: self.filename(templike(self._url)) if vipy.globals.cache() is not None: self.filename(os.path.join(remkdir(vipy.globals.cache()), filetail(self._url))) vipy.downloader.scp(self._url, self.filename(), verbose=verbose) elif not isvideourl(self._url) and vipy.videosearch.is_downloadable_url(self._url): vipy.videosearch.download(self._url, filefull(self._filename), writeurlfile=False, skip=ignoreErrors, verbose=verbose, max_filesize=max_filesize) for ext in ['mkv', 'mp4', 'webm']: f = '%s.%s' % (self.filename(), ext) if os.path.exists(f): self.filename(f) break if not self.hasfilename(): raise ValueError('Downloaded filenot found "%s.*"' % self.filename()) elif url_scheme == 'rtsp': # https://ffmpeg.org/ffmpeg-protocols.html#rtsp pass else: raise NotImplementedError( 'Invalid URL scheme "%s" for URL "%s"' % (url_scheme, self._url)) except (httplib.BadStatusLine, urllib.error.URLError, urllib.error.HTTPError): if ignoreErrors: warnings.warn('[vipy.video][WARNING]: download failed - Ignoring Video') self._array = None else: raise except IOError: if ignoreErrors: warnings.warn('[vipy.video][WARNING]: IO error - Invalid video file, url or invalid write permissions "%s" - Ignoring video' % self.filename()) self._array = None else: raise except KeyboardInterrupt: raise except Exception: if ignoreErrors: warnings.warn('[vipy.video][WARNING]: load error for video "%s"' % self.filename()) else: raise return self def fetch(self, ignoreErrors=False): """Download only if hasfilename() is not found""" return self.download(ignoreErrors=ignoreErrors) if not self.hasfilename() else self def shape(self, shape=None, probe=False): """Return (height, width) of the frames, requires loading a preview frame from the video if the video is not already loaded, or providing the shape=(height,width) by the user""" if probe: # Set the shape of the video from the filename by ffprobe, this should be deprecated return self.shape(self.probeshape(), probe=False) elif shape is not None: # Set the shape of the video using the shape provided by the user (e.g. sometimes the user knows what this will be) assert isinstance(shape, tuple), "shape=(height, width) tuple" self._shape = shape self._channels = self.channels() #self._previewhash = hashlib.md5(str(self._ffmpeg_commandline()).encode()).hexdigest() return self elif not self.isloaded(): # Preview a frame from the ffmpeg filter chain (more expensive) if self._shape is None or len(self._shape) == 0: # dirty filter chain im = self.preview() # ffmpeg chain changed, load a single frame of video, triggers fetch self._shape = (im.height(), im.width()) # cache the shape self._channels = im.channels() #self._previewhash = previewhash return self._shape else: # Frames already loaded - get shape from numpy array return (self._array.shape[1], self._array.shape[2]) def channelshape(self): """Return a tuple (channels, height, width) for the video""" return (self.channels(), self.height(), self.width()) def issquare(self): """Return true if the video has square dimensions (height == width), else false""" s = self.shape() return s[0] == s[1] def channels(self): """Return integer number of color channels""" if not self.isloaded(): self._channels = 3 # always color video #previewhash = hashlib.md5(str(self._ffmpeg_commandline()).encode()).hexdigest() #if not hasattr(self, '_previewhash') or previewhash != self._previewhash: # im = self.preview() # ffmpeg chain changed, load a single frame of video # self._shape = (im.height(), im.width()) # cache the shape # self._channels = im.channels() # cache # self._previewhash = previewhash return self._channels # cached else: return 1 if self.load().array().ndim == 3 else self.load().array().shape[3] def width(self): """Width (cols) in pixels of the video for the current filter chain""" return self.shape()[1] def height(self): """Height (rows) in pixels of the video for the current filter chain""" return self.shape()[0] def aspect_ratio(self): """The width/height of the video expressed as a fraction""" return self.width() / self.height() def preview(self, framenum=0): """Return selected frame of filtered video, return vipy.image.Image object. This is useful for previewing the frame shape of a complex filter chain or the frame contents at a particular location without loading the whole video""" if self.isloaded(): return self[framenum] elif self.hasurl() and not self.hasfilename(): self.download(verbose=True) if not self.hasfilename(): raise ValueError('Video file not found') if iswebp(self.filename()) or isgif(self.filename()): return self.load().frame(framenum) # Convert frame to mjpeg and pipe to stdout, used to get dimensions of video # - The MJPEG encoder will generally output lower quality than H.264 encoded frames # - This means that frame indexing from preview() will generate slightly different images than streaming raw # - Beware running convnets, as the pixels will be slightly different (~4 grey levels in uint8) ... try: # FFMPEG frame indexing is inefficient for large framenum. Need to add "-ss sec.msec" flag before input # - the "ss" option must be provided before the input filename, and is supported by ffmpeg-python as ".input(in_filename, ss=time)" # - Seek to the frame before the desired frame in order to pipe the next (desired) frame timestamp_in_seconds = max(0.0, (framenum-1)/float(self.framerate())) f_prepipe = self.clone(shallow=True)._update_ffmpeg_seek(offset=timestamp_in_seconds)._ffmpeg.filter('select', 'gte(n,{})'.format(0)) f = f_prepipe.output('pipe:', vframes=1, format='image2', vcodec='mjpeg')\ .global_args('-cpuflags', '0', '-loglevel', 'debug' if vipy.globals.isdebug() else 'error') (out, err) = f.run(capture_stdout=True, capture_stderr=True) except Exception as e: raise ValueError('[vipy.video.load]: Video preview failed with error "%s"\n - Video: "%s"\n - FFMPEG command: \'sh> %s\'\n - Try manually running this ffmpeg command to see errors. This error usually means that the video is corrupted.' % (str(e), str(self), str(self._ffmpeg_commandline(f_prepipe.output('preview.jpg', vframes=1))))) # [EXCEPTION]: UnidentifiedImageError: cannot identify image file, means usually that FFMPEG piped a zero length image try: return Image(array=np.array(PIL.Image.open(BytesIO(out)))) except Exception as e: print('[vipy.video.Video.preview][ERROR]: %s' % str(e)) print(' - FFMPEG attempted to extract a single frame from the following video and failed:\n %s' % str(self)) print(' - This may occur after calling clip() with too short a duration, try increasing the clip to be > 1 sec') print(' - This may occur after calling clip() with a startframe or endframe outside the duration of the video') print(' - This may occur if requesting a frame number greater than the length of the video. At this point, we do not know the video length, and cannot fail gracefully') print(' - This may occur when the framerate of the video from ffprobe (tbr) does not match that passed to fps filter, resulting in a zero length image preview piped to stdout') print(' - This may occur if the filter chain fails for some unknown reason on this video. Try running this ffmpeg command manually and inspect the FFMPEG console output:\n sh> %s' % str(self._ffmpeg_commandline(f_prepipe.output('preview.jpg', vframes=1)))) raise def thumbnail(self, outfile=None, frame=0): """Return annotated frame=k of video, save annotation visualization to provided outfile. This is functionally equivalent to `vipy.video.Video.frame` with an additional outfile argument to easily save an annotated thumbnail image. Args: outfile: [str] an optional outfile to save the annotated frame frame: [int >= 0] The frame to output the thumbnail Returns: A `vipy.image.Image` object for frame k. """ im = self.frame(frame, img=self.preview(frame).array()) return im.savefig(outfile) if outfile is not None else im def load(self, verbose=False, ignoreErrors=False, shape=None): """Load a video using ffmpeg, applying the requested filter chain. Args: verbose: [bool] if True. then ffmpeg console output will be displayed. ignoreErrors: [bool] if True, then all load errors are warned and skipped. Be sure to call isloaded() to confirm loading was successful. shape: [tuple (height, width, channels)] If provided, use this shape for reading and reshaping the byte stream from ffmpeg. This is useful for efficient loading in some scenarios. Knowing the final output shape can speed up loads by avoiding a preview() of the filter chain to get the frame size Returns: this video object, with the pixels loaded in self.array() .. warning:: Loading long videos can result in out of memory conditions. Try to call clip() first to extract a video segment to load(). """ if self.isloaded(): return self elif not self.hasfilename() and self.hasurl(): self.download(ignoreErrors=ignoreErrors) elif not self.hasfilename() and not ignoreErrors: raise ValueError('Invalid input - load() requires a valid URL, filename or array') if not self.hasfilename() and ignoreErrors: print('[vipy.video.load]: Video file "%s" not found - Ignoring' % self.filename()) return self if iswebp(self.filename()) or isgif(self.filename()): frames = [] pil = PIL.Image.open(self.filename()) self._framerate = (1000.0 / pil.info['duration']) if 'duration' in pil.info else self._framerate for k in range(pil.n_frames): pil.seek(k) frames.append(np.array(pil.convert('RGB'))) return self.array(np.stack(frames)).colorspace('RGB') # Load the video with ffmpeg # # [EXCEPTION]: older ffmpeg versions may segfault on complex crop filter chains # -On some versions of ffmpeg setting -cpuflags=0 fixes it, but the right solution is to rebuild from the head (30APR20) if verbose: print('[vipy.video.load]: Loading "%s"' % self.filename()) try: f_prepipe = copy.deepcopy(self._ffmpeg) f = self._ffmpeg.output('pipe:', format='rawvideo', pix_fmt='rgb24')\ .global_args('-cpuflags', '0', '-loglevel', 'debug' if vipy.globals.isdebug() else 'quiet') (out, err) = f.run(capture_stdout=True, capture_stderr=True) except Exception as e: if not ignoreErrors: raise ValueError('[vipy.video.load]: Load failed with error "%s"\n\n - Video: "%s"\n - FFMPEG command: \'sh> %s\'\n - This error usually means that the video is corrupted or that you need to upgrade your FFMPEG distribution to the latest stable version.\n - Try running the output of the ffmpeg command for debugging.' % (str(e), str(self), str(self._ffmpeg_commandline(f_prepipe.output('preview.mp4'))))) else: return self # Failed, return immediately, useful for calling canload() # Video shape: # - due to complex filter chains, we may not know the final video size without executing it # - However, this introduces extra cost by calling preview() on each filter chain # - If we know what the shape will be (e.g. we made the video square with a known size), then use it here directly (height, width, channels) = (self.height(), self.width(), self.channels()) if shape is None else shape # [EXCEPTION]: older ffmpeg versions may be off by one on the size returned from self.preview() which uses an image decoder vs. f.run() which uses a video decoder # -Try to correct this manually by searching for a off-by-one-pixel decoding that works. The right way is to upgrade your FFMPEG version to the FFMPEG head (11JUN20) # -We cannot tell which is the one that the end-user wanted, so we leave it up to the calling function to check dimensions (see self.resize()) if (len(out) % (height*width*channels)) != 0: #warnings.warn('Your FFMPEG version is triggering a known bug that is being worked around in an inefficient manner. Consider upgrading your FFMPEG distribution.') if (len(out) % ((height-1)*(width-1)*channels) == 0): (newwidth, newheight) = (width-1, height-1) elif (len(out) % ((height-1)*(width)*channels) == 0): (newwidth, newheight) = (width, height-1) elif (len(out) % ((height-1)*(width+1)*channels) == 0): (newwidth, newheight) = (width+1, height-1) elif (len(out) % ((height)*(width-1)*channels) == 0): (newwidth, newheight) = (width-1, height) elif (len(out) % ((height)*(width+1)*channels) == 0): (newwidth, newheight) = (width+1, height) elif (len(out) % ((height+1)*(width-1)*channels) == 0): (newwidth, newheight) = (width-1, height+1) elif (len(out) % ((height+1)*(width)*channels) == 0): (newwidth, newheight) = (width, height+1) elif (len(out) % ((height+1)*(width+1)*channels) == 0): (newwidth, newheight) = (width+1, height+1) else: (newwidth, newheight) = (width, height) is_loadable = (len(out) % (newheight*newwidth*channels)) == 0 if not is_loadable: im = self.preview() # get the real shape... (newheight, newwidth, newchannels) = (im.height(), im.width(), im.channels()) assert is_loadable or ignoreErrors, "Load failed for video '%s', and FFMPEG command line: '%s'" % (str(self), str(self._ffmpeg_commandline(f))) self._array = np.frombuffer(out, np.uint8).reshape([-1, newheight, newwidth, channels]) if is_loadable else None # read-only self.colorspace('rgb' if channels == 3 else 'lum') self.resize(rows=height, cols=width) # Very expensive framewise resizing so that the loaded video is identical shape to preview else: self._array = np.frombuffer(out, np.uint8).reshape([-1, height, width, channels]) # read-only self.colorspace('rgb' if channels == 3 else 'lum') return self def speed(self, s): """Change the speed by a multiplier s. If s=1, this will be the same speed, s=0.5 for half-speed (slower playback), s=2 for double-speed (faster playback)""" assert s > 0, "Invalid input" self._ffmpeg = self._ffmpeg.setpts('%1.3f*PTS' % float(1.0/float(s))) return self def clip(self, startframe, endframe=None): """Load a video clip betweeen start and end frames""" assert (endframe is None or startframe <= endframe) and startframe >= 0, "Invalid start and end frames (%s, %s)" % (str(startframe), str(endframe)) if not self.isloaded(): timestamp_in_seconds = ((self._startframe if self._startframe is not None else 0)+startframe)/float(self.framerate()) self._update_ffmpeg_seek(timestamp_in_seconds) if endframe is not None: self._ffmpeg = self._ffmpeg.setpts('PTS-STARTPTS') # reset timestamp to 0 before trim filter self._ffmpeg = self._ffmpeg.trim(start=0, end=(endframe-startframe)/self.framerate()) # must be in seconds to allow for framerate conversion self._ffmpeg = self._ffmpeg.setpts('PTS-STARTPTS') # reset timestamp to 0 after trim filter self._startframe = startframe if self._startframe is None else self._startframe + startframe # for __repr__ only self._endframe = (self._startframe + (endframe-startframe)) if endframe is not None else endframe # for __repr__ only else: endframe = endframe if endframe is not None else len(self._array) self._array = self._array[startframe:endframe] (self._startframe, self._endframe) = (0, endframe-startframe) return self def cliprange(self): """Return the planned clip (startframe, endframe) range. This is useful for introspection of the planned clip() before load(), such as for data augmentation purposes without triggering a load. Returns: (startframe, endframe) of the video() such that after load(), the pixel buffer will contain frame=0 equivalent to startframe in the source video, and frame=endframe-startframe-1 equivalent to endframe in the source video. (0, None) If a video does not have a clip() (e.g. clip() was never called, the filter chain does not include a 'trim') .. notes:: The endframe can be retrieved (inefficiently) using: ```python int(round(self.duration_in_frames_of_videofile() * (self.framerate() / self.framerate_of_videofile()))) ``` """ return (self._startframe if self._startframe is not None else 0, self._endframe) #def cliptime(self, startsec, endsec): # """Load a video clip betweeen start seconds and end seconds, should be initialized by constructor, which will work but will not set __repr__ correctly""" # assert startsec <= endsec and startsec >= 0, "Invalid start and end seconds (%s, %s)" % (str(startsec), str(endsec)) # assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" # self._ffmpeg = self._ffmpeg.trim(start=startsec, end=endsec)\ # .setpts('PTS-STARTPTS') # reset timestamp to 0 after trim filter # self._startsec = startsec if self._startsec is None else self._startsec + startsec # for __repr__ only # self._endsec = endsec if self._endsec is None else self._startsec + (endsec-startsec) # for __repr__ only # return self def rot90cw(self): """Rotate the video 90 degrees clockwise, can only be applied prior to load()""" assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" self.shape(shape=(self.width(), self.height())) # transposed self._ffmpeg = self._ffmpeg.filter('transpose', 1) return self def rot90ccw(self): """Rotate the video 90 degrees counter-clockwise, can only be applied prior to load()""" assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" self.shape(shape=(self.width(), self.height())) # transposed self._ffmpeg = self._ffmpeg.filter('transpose', 2) return self def fliplr(self): """Mirror the video left/right by flipping horizontally""" if not self.isloaded(): self._ffmpeg = self._ffmpeg.filter('hflip') else: self.array(np.stack([np.fliplr(f) for f in self._array]), copy=False) return self def flipud(self): """Rotate the video 90 degrees counter-clockwise, can only be applied prior to load()""" assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" self._ffmpeg = self._ffmpeg.filter('vflip') return self def rescale(self, s): """Rescale the video by factor s, such that the new dimensions are (s*H, s*W), can only be applied prior to load()""" if s == 1: return self assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" self.shape(shape=(int(np.round(self.height()*float(np.ceil(s*1e6)/1e6))), int(np.round(self.width()*float(np.ceil(s*1e6)/1e6))))) # update the known shape self._ffmpeg = self._ffmpeg.filter('scale', 'iw*%1.6f' % float(np.ceil(s*1e6)/1e6), 'ih*%1.6f' % float(np.ceil(s*1e6)/1e6)) # ceil last significant digit to avoid off by one return self def resize(self, rows=None, cols=None, width=None, height=None): """Resize the video to be (rows=height, cols=width)""" assert not (rows is not None and height is not None) assert not (cols is not None and width is not None) rows = rows if rows is not None else height cols = cols if cols is not None else width newshape = (rows if rows is not None else int(np.round(self.height()*(cols/self.width()))), cols if cols is not None else int(np.round(self.width()*(rows/self.height())))) if (rows is None and cols is None): return self # only if strictly necessary if not self.isloaded(): # Apply the scale filter: # - Note that multiple calls to resize will process through the filter chain one by one in the order of resizing calls # - This can introduce resizing artifacts if the resize is a downsample followed by an upsample # - One common use case is to downsample a video for tracking, then apply the tracks to the original video # - One approach: downsample the video, track at low resolution, upsample tracks back to the original resolution, then flush the filter chain to load the video at original resolution with no rescaling # >>> vc = tracker(v.clone().mindim(128)).mindim(v.mindim()).clone(flushfilter=True) self._ffmpeg = self._ffmpeg.filter('scale', cols if cols is not None else -1, rows if rows is not None else -1) else: # Do not use self.__iter__() which triggers copy for mutable arrays #self.array(np.stack([Image(array=self._array[k]).resize(rows=rows, cols=cols).array() for k in range(len(self))]), copy=False) # Faster: RGB->RGBX to allow for PIL.Image.fromarray() without tobytes() copy, padding faster than np.concatenate() #self.array(np.stack([PIL.Image.fromarray(x, mode='RGBX').resize( (cols, rows), resample=PIL.Image.BILINEAR) for x in np.pad(self._array, ((0,0),(0,0),(0,0),(0,1)))])[:,:,:,:-1], copy=False) # RGB->RGBX->RGB # Fastest: padding introduces more overhead than just accepting tobytes(), image size dependent? self.array(np.stack([PIL.Image.fromarray(x).resize( (newshape[1], newshape[0]), resample=PIL.Image.BILINEAR) for x in np.ascontiguousarray(self._array)]), copy=False) self.shape(shape=newshape) # manually set newshape return self def mindim(self, dim=None): """Resize the video so that the minimum of (width,height)=dim, preserving aspect ratio""" (H,W) = self.shape() # yuck, need to get image dimensions before filter return min(self.shape()) if dim is None else (self if min(H,W) == dim else (self.resize(cols=dim) if W<H else self.resize(rows=dim))) def maxdim(self, dim=None): """Resize the video so that the maximum of (width,height)=dim, preserving aspect ratio""" assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" (H,W) = self.shape() # yuck, need to get image dimensions before filter return max(H,W) if dim is None else (self.resize(cols=dim) if W>H else self.resize(rows=dim)) def randomcrop(self, shape, withbox=False): """Crop the video to shape=(H,W) with random position such that the crop contains only valid pixels, and optionally return the box""" assert shape[0] <= self.height() and shape[1] <= self.width() # triggers preview() (xmin, ymin) = (np.random.randint(self.height()-shape[0]), np.random.randint(self.width()-shape[1])) bb = vipy.geometry.BoundingBox(xmin=int(xmin), ymin=int(ymin), width=int(shape[1]), height=int(shape[0])) # may be outside frame self.crop(bb, zeropad=True) return self if not withbox else (self, bb) def centercrop(self, shape, withbox=False): """Crop the video to shape=(H,W) preserving the integer centroid position, and optionally return the box""" assert shape[0] <= self.height() and shape[1] <= self.width() # triggers preview() bb = vipy.geometry.BoundingBox(xcentroid=float(self.width()/2.0), ycentroid=float(self.height()/2.0), width=float(shape[1]), height=float(shape[0])).int() # may be outside frame self.crop(bb, zeropad=True) return self if not withbox else (self, bb) def centersquare(self): """Crop video of size (NxN) in the center, such that N=min(width,height), keeping the video centroid constant""" return self.centercrop( (min(self.height(), self.width()), min(self.height(), self.width()))) def cropeven(self): """Crop the video to the largest even (width,height) less than or equal to current (width,height). This is useful for some codecs or filters which require even shape.""" return self.crop(vipy.geometry.BoundingBox(xmin=0, ymin=0, width=int(vipy.math.even(self.width())), height=int(vipy.math.even(self.height())))) def maxsquare(self): """Pad the video to be square, preserving the upper left corner of the video""" # This ffmpeg filter can throw the error: "Padded dimensions cannot be smaller than input dimensions." since the preview is off by one. Add one here to make sure. # FIXME: not sure where in some filter chains this off-by-one error is being introduced, but probably does not matter since it does not affect any annotations # and since the max square always preserves the scale and the upper left corner of the source video. # FIXME: this may trigger an inefficient resizing operation during load() if not self.issquare(): d = max(self.shape()) self._ffmpeg = self._ffmpeg.filter('pad', d+1, d+1, 0, 0) self.shape(shape=(d+1, d+1)) return self.crop(vipy.geometry.BoundingBox(xmin=0, ymin=0, width=int(d), height=int(d))) else: return self def minsquare(self): """Return a square crop of the video, preserving the upper left corner of the video""" d = min(self.shape()) self.shape(shape=(d, d)) return self.crop(vipy.geometry.BoundingBox(xmin=0, ymin=0, width=int(d), height=int(d))) def maxmatte(self): """Return a square video with dimensions (self.maxdim(), self.maxdim()) with zeropadded lack bars or mattes above or below the video forming a letterboxed video.""" return self.zeropad(max(1, int((max(self.shape()) - self.width())/2)), max(int((max(self.shape()) - self.height())/2), 1)).maxsquare() def zeropad(self, padwidth, padheight): """Zero pad the video with padwidth columns before and after, and padheight rows before and after .. notes:: Older FFMPEG implementations can throw the error "Input area #:#:#:# not within the padded area #:#:#:# or zero-sized, this is often caused by odd sized padding. Recommend calling self.cropeven().zeropad(...) to avoid this """ assert isinstance(padwidth, int) and isinstance(padheight, int) if not self.isloaded(): self.shape(shape=(self.height()+2*padheight, self.width()+2*padwidth)) # manually set shape to avoid preview self._ffmpeg = self._ffmpeg.filter('pad', 'iw+%d' % (2*padwidth), 'ih+%d' % (2*padheight), '%d'%padwidth, '%d'%padheight) elif padwidth > 0 or padheight > 0: self.array( np.pad(self.array(), ((0,0), (padheight,padheight), (padwidth,padwidth), (0,0)), mode='constant'), copy=False) # this is very expensive, since np.pad() must copy (once in np.pad >=1.17) return self def pad(self, padwidth=0, padheight=0): """Alias for zeropad""" return self.zeropad(padwidth=padwidth, padheight=padheight) def zeropadlike(self, width, height): """Zero pad the video balancing the border so that the resulting video size is (width, height).""" assert width >= self.width() and height >= self.height(), "Invalid input - final (width=%d, height=%d) must be greater than current image size (width=%d, height=%d)" % (width, height, self.width(), self.height()) assert int(np.floor((width - self.width())/2)) == int(np.ceil((width - self.width())/2)), "Zero pad must be symmetric, this is often due to odd zeropadding which ffmpeg doesn't like. Try changing the width +/- 1 pixel" assert int(np.floor((height - self.height())/2)) == int(np.ceil((height - self.height())/2)), "Zero pad must be symmetric, this is often due to odd zeropadding which ffmpeg doesn't like. Try changing the height +/- 1 pixel" return self.zeropad(int(np.floor((width - self.width())/2)), int(np.floor((height - self.height())/2))) def crop(self, bbi, zeropad=True): """Spatially crop the video using the supplied vipy.geometry.BoundingBox, can only be applied prior to load(). .. note:: Crop is performed in place overwriting pixels of self.array(). Clone() before crop() if array() must be preserved. """ assert isinstance(bbi, vipy.geometry.BoundingBox), "Invalid input" bbc = bbi.clone().imclipshape(self.width(), self.height()).int() # clipped box to image rectangle bb = bbi.int() if zeropad else bbc # use clipped box if not zeropad if bb.isdegenerate(): return None elif not self.isloaded(): if zeropad and bb != bbc: # Crop outside the image rectangle will segfault ffmpeg, pad video first (if zeropad=False, then rangecheck will not occur!) self.zeropad(int(np.ceil(bb.width()-bbc.width())), int(np.ceil(bb.height()-bbc.height()))) # cannot be called in derived classes bb = bb.offset(int(np.ceil(bb.width()-bbc.width())), int(np.ceil(bb.height()-bbc.height()))) # Shift boundingbox by padding (integer coordinates) self._ffmpeg = self._ffmpeg.filter('crop', '%d' % bb.width(), '%d' % bb.height(), '%d' % bb.xmin(), '%d' % bb.ymin(), keep_aspect=0) # keep_aspect=False (disable exact=True, this is not present in older ffmpeg) else: self.array( bbc.crop(self.array()), copy=False ) # crop first, in-place, valid pixels only if zeropad and bb != bbc: ((dyb, dya), (dxb, dxa)) = ((max(0, int(abs(np.ceil(bb.ymin() - bbc.ymin())))), max(0, int(abs(np.ceil(bb.ymax() - bbc.ymax()))))), (max(0, int(abs(np.ceil(bb.xmin() - bbc.xmin())))), max(0, int(abs(np.ceil(bb.xmax() - bbc.xmax())))))) self._array = np.pad(self.load().array(), ((0,0), (dyb, dya), (dxb, dxa), (0, 0)), mode='constant') self.shape(shape=(bb.height(), bb.width())) # manually set shape return self def pkl(self, pklfile=None): """save the object to a pickle file and return the object, useful for intermediate saving in long fluent chains""" pklfile = pklfile if pklfile is not None else toextension(self.filename(), '.pkl') remkdir(filepath(pklfile)) vipy.util.save(self, pklfile) return self def pklif(self, b, pklfile=None): """Save the object to the provided pickle file only if b=True. Uuseful for conditional intermediate saving in long fluent chains""" assert isinstance(b, bool) return self.pkl(pklfile) if b else self def webp(self, outfile=None, pause=3, strict=True, smallest=False, smaller=False, framerate=None): """Save a video to an animated WEBP file, with pause=N seconds on the last frame between loops. Args: strict: If true, assert that the filename must have an .webp extension pause: Integer seconds to pause between loops of the animation smallest: if true, create the smallest possible file but takes much longer to run smaller: If true, create a smaller file, which takes a little longer to run framerate [float]: The output framerate of the webp file. The default is the framerate of the video. Returns: The filename of the webp file for this video .. warning:: This may be slow for very long or large videos """ outfile = vipy.util.tempWEBP() if outfile is None else outfile assert strict is False or iswebp(outfile) outfile = os.path.normpath(os.path.abspath(os.path.expanduser(outfile))) framerate = framerate if framerate is not None else self._framerate self.load().frame(0).pil().save(outfile, loop=0, save_all=True, method=6 if smallest else 3 if smaller else 0, append_images=[self.frame(k).pil() for k in range(1, len(self))], duration=[int(1000.0/framerate) for k in range(0, len(self)-1)] + [pause*1000]) return outfile def gif(self, outfile, pause=3, smallest=False, smaller=False, framerate=None): """Save a video to an animated GIF file, with pause=N seconds between loops. Args: pause: Integer seconds to pause between loops of the animation smallest: If true, create the smallest possible file but takes much longer to run smaller: if trye, create a smaller file, which takes a little longer to run framerate [float]: The output framerate of the webp file. The default is the framerate of the video. Returns: The filename of the animated GIF of this video .. warning:: This will be very large for big videos, consider using `vipy.video.Video.webp` instead. """ assert isgif(outfile) return self.webp(outfile, pause, strict=False, smallest=smallest, smaller=True, framerate=framerate) def saveas(self, outfile=None, framerate=None, vcodec='libx264', verbose=False, ignoreErrors=False, flush=False, pause=5): """Save video to new output video file. This function does not draw boxes, it saves pixels to a new video file. Args: outfile: the absolute path to the output video file. This extension can be .mp4 (for video) or [".webp",".gif"] (for animated image) ignoreErrors: if True, then exit gracefully without throwing an exception. Useful for chaining download().saveas() on parallel dataset downloads flush: If true, then flush the buffer for this object right after saving the new video. This is useful for transcoding in parallel framerate: input framerate of the frames in the buffer, or the output framerate of the transcoded video. If not provided, use framerate of source video pause: an integer in seconds to pause between loops of animated images if the outfile is webp or animated gif Returns: a new video object with this video filename, and a clean video filter chain .. note:: - If self.array() is loaded, then export the contents of self._array to the video file - If self.array() is not loaded, and there exists a valid video file, apply the filter chain directly to the input video - If outfile==None or outfile==self.filename(), then overwrite the current filename """ outfile = tocache(tempMP4()) if outfile is None else os.path.normpath(os.path.abspath(os.path.expanduser(outfile))) premkdir(outfile) # create output directory for this file if not exists framerate = framerate if framerate is not None else self._framerate assert vipy.util.isvideofile(outfile), "Invalid filename extension for video filename" if verbose: print('[vipy.video.saveas]: Saving video "%s" ...' % outfile) try: if iswebp(outfile): return self.webp(outfile, pause) elif isgif(outfile): return self.gif(outfile, pause) elif isjsonfile(outfile): with open(outfile) as f: f.write(self.json(encode=True)) return outfile elif self.isloaded(): # Save numpy() from load() to video, forcing to be even shape (n, height, width, channels) = self._array.shape process = ffmpeg.input('pipe:', format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(width, height), r=framerate) \ .filter('pad', 'ceil(iw/2)*2', 'ceil(ih/2)*2') \ .output(filename=outfile, pix_fmt='yuv420p', vcodec=vcodec) \ .overwrite_output() \ .global_args('-cpuflags', '0', '-loglevel', 'quiet' if not vipy.globals.isdebug() else 'debug') \ .run_async(pipe_stdin=True) for frame in self._array: process.stdin.write(frame.astype(np.uint8).tobytes()) process.stdin.close() process.wait() elif (self.isdownloaded() and self._isdirty()) or isRTSPurl(self.filename()) or isRTMPurl(self.filename()): # Transcode the video file directly, do not load() then export # Requires saving to a tmpfile if the output filename is the same as the input filename tmpfile = '%s.tmp%s' % (filefull(outfile), fileext(outfile)) if outfile == self.filename() else outfile self._ffmpeg.filter('pad', 'ceil(iw/2)*2', 'ceil(ih/2)*2') \ .output(filename=tmpfile, pix_fmt='yuv420p', vcodec=vcodec, r=framerate) \ .overwrite_output() \ .global_args('-cpuflags', '0', '-loglevel', 'quiet' if not vipy.globals.isdebug() else 'debug') \ .run() if outfile == self.filename(): if os.path.exists(self.filename()): os.remove(self.filename()) shutil.move(tmpfile, self.filename()) elif self.hasfilename() and not self._isdirty(): shutil.copyfile(self.filename(), outfile) elif self.hasurl() and not self.hasfilename(): raise ValueError('Input video url "%s" not downloaded, call download() first' % self.url()) elif not self.hasfilename(): raise ValueError('Input video file not found "%s"' % self.filename()) else: raise ValueError('saveas() failed') except Exception as e: if ignoreErrors: # useful for saving a large number of videos in parallel where some failed download print('[vipy.video.saveas]: Failed with error "%s" - Returning empty video' % str(repr(e))) else: raise # Return a new video, cloned from this video with the new video file, optionally flush the video we loaded before returning return self.clone(flushforward=True, flushfilter=True, flushbackward=flush).filename(outfile).nourl() def savetmp(self): """Call `vipy.video.Video.saveas` using a new temporary video file, and return the video object with this new filename""" return self.saveas(outfile=tempMP4()) def savetemp(self): """Alias for `vipy.video.Video.savetmp`""" return self.savetmp() def tocache(self): """Call `vipy.video.Video.saveas` using a new temporary video file in VIPY cache, and return the video object with this new filename""" return self.saveas(outfile=vipy.util.tocache(tempMP4())) def ffplay(self): """Play the video file using ffplay""" assert self.hasfilename() or (self.hasurl() and self.download().hasfilename()) # triggers download if needed cmd = 'ffplay "%s"' % self.filename() print('[vipy.video.play]: Executing "%s"' % cmd) os.system(cmd) return self def play(self, verbose=False, notebook=False, ffplay=True): """Play the saved video filename in self.filename() If there is no filename, try to download it. If the filter chain is dirty or the pixels are loaded, dump to temp video file first then play it. This uses 'ffplay' on the PATH if available, otherwise uses a fallback player by showing a sequence of matplotlib frames. If the output of the ffmpeg filter chain has modified this video, then this will be saved to a temporary video file. To play the original video (indepenedent of the filter chain of this video), use `vipy.video.Video.ffplay`. Args: verbose: If true, show more verbose output notebook: If true, play in a jupyter notebook ffplay: If true, use ffplay to display the video (if available) Returns: The unmodified video object """ if not self.isdownloaded() and self.hasurl(): self.download() if not self.isloaded() and (iswebp(self.filename()) or isgif(self.filename())): self.load() if notebook: # save to temporary video, this video is not cleaned up and may accumulate try_import("IPython.display", "ipython"); import IPython.display if not self.hasfilename() or self.isloaded() or self._isdirty(): v = self.saveas(tempMP4()) warnings.warn('Saving video to temporary file "%s" for notebook viewer ... ' % v.filename()) return IPython.display.Video(v.filename(), embed=True) return IPython.display.Video(self.filename(), embed=True) elif ffplay and has_ffplay: if self.isloaded() or self._isdirty(): f = tempMP4() if verbose: warnings.warn('%s - Saving video to temporary file "%s" for ffplay ... ' % ('Video loaded into memory' if self.isloaded() else 'Dirty FFMPEG filter chain', f)) v = self.saveas(f) cmd = 'ffplay "%s"' % v.filename() if verbose: print('[vipy.video.play]: Executing "%s"' % cmd) os.system(cmd) if verbose: print('[vipy.video.play]: Removing temporary file "%s"' % v.filename()) os.remove(v.filename()) # cleanup elif self.hasfilename() or (self.hasurl() and self.download().hasfilename()): # triggers download self.ffplay() else: raise ValueError('Invalid video file "%s" - ffplay requires a video filename' % self.filename()) return self else: """Fallback player. This can visualize videos without ffplay, but it cannot guarantee frame rates. Large videos with complex scenes will slow this down and will render at lower frame rates.""" fps = self.framerate() assert fps > 0, "Invalid display framerate" with Stopwatch() as sw: for (k,im) in enumerate(self.load() if self.isloaded() else self.stream()): time.sleep(max(0, (1.0/self.framerate())*int(np.ceil((self.framerate()/fps))) - sw.since())) im.show(figure='video') if vipy.globals._user_hit_escape(): break vipy.show.close('video') return self def show(self, verbose=False, notebook=False, ffplay=False): """Alias for play""" return self.play(verbose=verbose, notebook=notebook, ffplay=ffplay) def quicklook(self, n=9, mindim=256, startframe=0, animate=False, dt=30, thumbnail=None): """Generate a montage of n uniformly spaced frames. Montage increases rowwise for n uniformly spaced frames, starting from frame zero and ending on the last frame. Input: n: Number of images in the quicklook mindim: The minimum dimension of each of the elements in the montage animate: If true, return a video constructed by animating the quicklook into a video by showing dt consecutive frames dt: The number of frames for animation startframe: The initial frame index to start the n uniformly sampled frames for the quicklook thumbnail [`vipy.image.Image`]: If provided, prepent the first element in the montage with this thumbnail. This is useful for showing a high resolution image (e.g. a face, small object) to be contained in the video for review. ..note:: The first frame in the upper left is guaranteed to be the start frame of the labeled activity, but the last frame in the bottom right may not be precisely the end frame and may be off by at most len(video)/9. """ if not self.isloaded(): self.load() if animate: return Video(frames=[self.quicklook(n=n, startframe=k, animate=False, dt=dt) for k in range(0, min(dt, len(self)))], framerate=self.framerate()) framelist = [min(int(np.round(f))+startframe, len(self)-1) for f in np.linspace(0, len(self)-1, n)] imframes = [self.frame(k).maxmatte() # letterbox or pillarbox for (j,k) in enumerate(framelist)] imframes = [im.savefig(figure=1).rgb() for im in imframes] # temp storage in memory if thumbnail is not None: assert isinstance(thumbnail, vipy.image.Image) imframes = [thumbnail.maxmatte().mindim(mindim)] + imframes # prepend return vipy.visualize.montage(imframes, imgwidth=mindim, imgheight=mindim) def torch(self, startframe=0, endframe=None, length=None, stride=1, take=None, boundary='repeat', order='nchw', verbose=False, withslice=False, scale=1.0, withlabel=False, nonelabel=False): """Convert the loaded video of shape NxHxWxC frames to an MxCxHxW torch tensor/ Args: startframe: [int >= 0] The start frame of the loaded video to use for constructig the torch tensor endframe: [int >= 0] The end frame of the loaded video to use for constructing the torch tensor length: [int >= 0] The length of the torch tensor if endframe is not provided. stride: [int >= 1] The temporal stride in frames. This is the number of frames to skip. take: [int >= 0] The number of uniformly spaced frames to include in the tensor. boundary: ['repeat', 'cyclic'] The boundary handling for when the requested tensor slice goes beyond the end of the video order: ['nchw', 'nhwc', 'chwn', 'cnhw'] The axis ordering of the returned torch tensor N=number of frames (batchsize), C=channels, H=height, W=width verbose [bool]: Print out the slice used for contructing tensor withslice: [bool] Return a tuple (tensor, slice) that includes the slice used to construct the tensor. Useful for data provenance. scale: [float] An optional scale factor to apply to the tensor. Useful for converting [0,255] -> [0,1] withlabel: [bool] Return a tuple (tensor, labels) that includes the N framewise activity labels. nonelabel: [bool] returns tuple (t, None) if withlabel=False Returns Returns torch float tensor, analogous to torchvision.transforms.ToTensor() Return (tensor, slice) if withslice=True (withslice takes precedence) Returns (tensor, labellist) if withlabel=True .. notes:: - This triggers a load() of the video - The precedence of arguments is (startframe, endframe) or (startframe, startframe+length), then stride and take. - Follows numpy slicing rules. Optionally return the slice used if withslice=True """ try_import('torch'); import torch frames = self.load().numpy() if self.load().numpy().ndim == 4 else np.expand_dims(self.load().numpy(), 3) # NxHxWx(C=1, C=3) assert boundary in ['repeat', 'strict', 'cyclic'], "Invalid boundary mode - must be in ['repeat', 'strict', 'cyclic']" # Slice index (i=start (zero-indexed), j=end (non-inclusive), k=step) (i,j,k) = (startframe, endframe, stride) if startframe == 'random': assert length is not None, "Random start frame requires fixed length" i = max(0, np.random.randint(len(frames)-length+1)) if endframe is not None: assert length is None, "Cannot specify both endframe and length" assert endframe > startframe, "End frame must be greater than start frame" (j,k) = (endframe, 1) if length is not None: assert endframe is None, "Cannot specify both endframe and length" assert length >= 0, "Length must be positive" (j,k) = (i+length, 1) if length is None and endframe is None: j = len(frames) # use them all if stride != 1: assert take is None, "Cannot specify both take and stride" assert stride >= 1, "Stride must be >= 1" k = stride if take is not None: # Uniformly sampled frames to result in len(frames)=take assert stride == 1, "Cannot specify both take and stride" assert take <= len(frames), "Take must be less than the number of frames" k = int(np.ceil(len(frames)/float(take))) # Boundary handling assert i >= 0, "Start frame must be >= 0" assert i < j, "Start frame must be less then end frame" assert k <= len(frames), "Stride must be <= len(frames)" n = len(frames) # true video length for labels if boundary == 'repeat' and j > len(frames): for d in range(j-len(frames)): frames = np.concatenate( (frames, np.expand_dims(frames[-1], 0) )) elif boundary == 'cyclic' and j > len(frames): for d in range(j-len(frames)): frames = np.concatenate( (frames, np.expand_dims(frames[j % len(frames)], 0) )) assert j <= len(frames), "invalid slice=%s for frame shape=%s" % (str((i,j,k)), str(frames.shape)) if verbose: print('[vipy.video.torch]: slice (start,end,step)=%s for frame shape (N,C,H,W)=%s' % (str((i,j,k)), str(frames.shape))) # Slice and transpose to torch tensor axis ordering t = torch.from_numpy(frames[i:j:k] if (k!=1 or i!=0 or j!=len(frames)) else frames) # do not copy - This shares the numpy buffer of the video, be careful! if t.dim() == 2: t = t.unsqueeze(0).unsqueeze(-1) # HxW -> (N=1)xHxWx(C=1) if order == 'nchw': t = t.permute(0,3,1,2) # NxCxHxW, view elif order == 'nhwc': pass # NxHxWxC (native numpy order) elif order == 'cnhw' or order == 'cdhw': t = t.permute(3,0,1,2) # CxNxHxW == CxDxHxW (for torch conv3d), view elif order == 'chwn': t = t.permute(3,1,2,0) # CxHxWxN, view else: raise ValueError("Invalid order = must be in ['nchw', 'nhwc', 'chwn', 'cnhw']") # Scaling (optional) if scale is not None and self.colorspace() != 'float': t = (1.0/255.0)*t # [0,255] -> [0,1] elif scale is not None and scale != 1.0: t = scale*t # Return tensor or (tensor, slice) or (tensor, labels) if withslice: return (t, (i,j,k)) elif withlabel: labels = [sorted(tuple(self.activitylabels( (f%n) if boundary == 'cyclic' else min(f, n-1) ))) for f in range(i,j,k)] return (t, labels) elif nonelabel: return (t, None) else: return t def clone(self, flushforward=False, flushbackward=False, flush=False, flushfilter=False, rekey=False, flushfile=False, shallow=False, sharedarray=False, sanitize=True): """Create deep copy of video object, flushing the original buffer if requested and returning the cloned object. Flushing is useful for distributed memory management to free the buffer from this object, and pass along a cloned object which can be used for encoding and will be garbage collected. Args: flushforward: copy the object, and set the cloned object `vipy.video.Video.array` to None. This flushes the video buffer for the clone, not the object flushbackward: copy the object, and set the object array() to None. This flushes the video buffer for the object, not the clone. flush: set the object array() to None and clone the object. This flushes the video buffer for both the clone and the object. flushfilter: Set the ffmpeg filter chain to the default in the new object, useful for saving new videos flushfile: Remove the filename and the URL from the video object. Useful for creating new video objects from loaded pixels. rekey: Generate new unique track ID and activity ID keys for this scene shallow: shallow copy everything (copy by reference), except for ffmpeg object. attributes dictionary is shallow copied sharedarray: deep copy of everything, except for pixel buffer which is shared. Changing the pixel buffer on self is reflected in the clone. sanitize: remove private attributes from self.attributes dictionary. A private attribute is any key with two leading underscores '__' which should not be propagated to clone Returns: A deepcopy of the video object such that changes to self are not reflected in the copy .. note:: Cloning videos is an expensive operation and can slow down real time code. Use sparingly. """ if sanitize: a = self.attributes # copy reference to attributes to restore self.attributes = {} # remove attributes on self for fast clone() since private attributes will be filtered anyway if flush or (flushforward and flushbackward): self._array = None # flushes buffer on object and clone #self._previewhash = None self._shape = None v = copy.deepcopy(self) # object and clone are flushed elif flushbackward: v = copy.deepcopy(self) # propagates _array to clone self._array = None # object flushed, clone not flushed #self._previewhash = None self._shape = None elif flushforward: array = self._array; self._array = None #self._previewhash = None self._shape = None v = copy.deepcopy(self) # does not propagate _array to clone self._array = array # object not flushed v._array = None # clone flushed elif shallow: v = copy.copy(self) # shallow copy v._ffmpeg = copy.deepcopy(self._ffmpeg) # except for ffmpeg object v.attributes = {k:v for (k,v) in self.attributes.items()} # shallow copy of keys v._array = np.asarray(self._array) if self._array is not None else None # shared pixels elif sharedarray: array = self._array self._array = None v = copy.deepcopy(self) # deep copy of everything but pixels v._array = np.asarray(array) if array is not None else None # shared pixels self._array = array # restore else: v = copy.deepcopy(self) if flushfilter: v._ffmpeg = ffmpeg.input(v.filename()) # no other filters #v._previewhash = None v._shape = None (v._startframe, v._endframe) = (None, None) (v._startsec, v._endsec) = (None, None) if rekey: v.rekey() if flushfile: v.nofilename().nourl() if sanitize: self.attributes = a # restore attributes v.attributes = {k:v for (k,v) in self.attributes.items()} # shallow copy v.sanitize() # remove private attributes return v def flush(self): """Alias for clone(flush=True), returns self not clone""" self._array = None # flushes buffer on object and clone self.delattribute("__stream_buffer") # remove if present #self._previewhash = None self._shape = None return self def returns(self, r=None): """Return the provided value, useful for terminating long fluent chains without returning self""" return r def flush_and_return(self, retval): """Flush the video and return the parameter supplied, useful for long fluent chains""" self.flush() return retval def map(self, func): """Apply lambda function to the loaded numpy array img, changes pixels not shape Lambda function must have the following signature: * newimg = func(img) * img: HxWxC numpy array for a single frame of video * newimg: HxWxC modified numpy array for this frame. Change only the pixels, not the shape The lambda function will be applied to every frame in the video in frame index order. """ assert isinstance(func, types.LambdaType), "Input must be lambda function with np.array() input and np.array() output" oldimgs = self.load().array() self.array(np.apply_along_axis(func, 0, self._array)) # FIXME: in-place operation? if (any([oldimg.dtype != newimg.dtype for (oldimg, newimg) in zip(oldimgs, self.array())]) or any([oldimg.shape != newimg.shape for (oldimg, newimg) in zip(oldimgs, self.array())])): self.colorspace('float') # unknown colorspace after shape or type transformation, set generic return self def gain(self, g): """Pixelwise multiplicative gain, such that each pixel p_{ij} = g * p_{ij}""" return self.normalize(0, 1, scale=g) def bias(self, b): """Pixelwise additive bias, such that each pixel p_{ij} = b + p_{ij}""" return self.normalize(mean=0, std=1, scale=1.0, bias=b) def float(self): self.load() self._array = self._array.astype(np.float32) if self._array is not None else self._array return self def channel(self, c): self.load() assert c >= 0 and c < self.channels() self._array = self._array[:,:,:,c] if self._array is not None else self._array return self def normalize(self, mean, std, scale=1, bias=0): """Pixelwise whitening, out = ((scale*in) - mean) / std); triggers load(). All computations float32""" assert scale >= 0, "Invalid input" assert all([s > 0 for s in tolist(std)]), "Invalid input" self._array = vipy.math.normalize(self._array, np.array(mean, dtype=np.float32), np.array(std, dtype=np.float32), np.float32(scale)) if bias != 0: self._array = self._array + np.array(bias, dtype=np.float32) return self.colorspace('float') def setattribute(self, k, v=None): if self.attributes is None: self.attributes = {} self.attributes[k] = v return self def _has_private_attribute(self): """Does the attributes dictionary contain any private attributes (e.g. those keys prepended with '__')""" return isinstance(self.attributes, dict) and any([k.startswith('__') for k in self.attributes.keys()]) def hasattribute(self, k): """Does the attributes dictionary (self.attributes) contain the provided key""" return isinstance(self.attributes, dict) and k in self.attributes def delattribute(self, k): if k in self.attributes: self.attributes.pop(k) return self def clearattributes(self): """Remove all attributes""" self.attributes = {} return self def getattribute(self, k): """Return the key k in the attributes dictionary (self.attributes) if present, else None""" return self.attributes[k] if k in self.attributes else None
Subclasses
Static methods
def cast(v)
-
Cast a conformal video object to a
Video
object.This is useful for downcasting superclasses.
vs = vipy.video.RandomScene() v = vipy.video.Video.cast(vs)
Expand source code Browse git
@classmethod def cast(cls, v): """Cast a conformal video object to a `vipy.video.Video` object. This is useful for downcasting superclasses. ```python vs = vipy.video.RandomScene() v = vipy.video.Video.cast(vs) ``` """ assert isinstance(v, vipy.video.Video), "Invalid input - must be derived from vipy.video.Video" v.__class__ = vipy.video.Video return v
def concatenate(videos, outfile, framerate=30, youtube_chapters=None)
-
Temporally concatenate a sequence of videos into a single video stored in outfile.
(v1, v2, v3) = (vipy.video.RandomVideo(128,128,32), vipy.video.RandomVideo(128,128,32), vipy.video.RandomVideo(128,128,32)) vc = vipy.video.Video.concatenate((v1, v2, v3), 'concatenated.mp4', youtube_chapters=lambda v: v.category())
In this example, vc will point to concatenated.mp4 which will contain (v1,v2,v3) concatenated temporally .
Args
videos
- a single video or an iterable of videos of type
Video
or an iterable of video files outfile
- the output filename to store the concatenation.
youtube_chapters
- [bool, callable]: If true, output a string that can be used to define the start and end times of chapters if this video is uploaded to youtube. The string output should be copied to the youtube video description in order to enable chapters on playback. This argument will default to the string representation ofo the video, but you may also pass a callable of the form: 'youtube_chapters=lambda v: str(v)' which will output the provided string for each video chapter. A useful lambda is 'youtube_chapters=lambda v: v.category()'
framerate
- [float]: The output frame rate of outfile
Returns
A
Video
object with filename()=outfile, such that outfile contains the temporal concatenation of pixels in (self, videos).Note
- self will not be modified, this will return a new
Video
object. - All videos must be the same shape().
If the videos are different shapes, you must pad them to a common size equal to self.shape().
Try
Video.zeropadlike()
. - The output video will be at the framerate of self.framerate().
- if you want to concatenate annotations, call
Scene.annotate()
first on the videos to save the annotations into the pixels, then concatenate.
Expand source code Browse git
@classmethod def concatenate(cls, videos, outfile, framerate=30, youtube_chapters=None): """Temporally concatenate a sequence of videos into a single video stored in outfile. ```python (v1, v2, v3) = (vipy.video.RandomVideo(128,128,32), vipy.video.RandomVideo(128,128,32), vipy.video.RandomVideo(128,128,32)) vc = vipy.video.Video.concatenate((v1, v2, v3), 'concatenated.mp4', youtube_chapters=lambda v: v.category()) ``` In this example, vc will point to concatenated.mp4 which will contain (v1,v2,v3) concatenated temporally . Args: videos: a single video or an iterable of videos of type `vipy.video.Video` or an iterable of video files outfile: the output filename to store the concatenation. youtube_chapters: [bool, callable]: If true, output a string that can be used to define the start and end times of chapters if this video is uploaded to youtube. The string output should be copied to the youtube video description in order to enable chapters on playback. This argument will default to the string representation ofo the video, but you may also pass a callable of the form: 'youtube_chapters=lambda v: str(v)' which will output the provided string for each video chapter. A useful lambda is 'youtube_chapters=lambda v: v.category()' framerate: [float]: The output frame rate of outfile Returns: A `vipy.video.Video` object with filename()=outfile, such that outfile contains the temporal concatenation of pixels in (self, videos). .. note:: - self will not be modified, this will return a new `vipy.video.Video` object. - All videos must be the same shape(). If the videos are different shapes, you must pad them to a common size equal to self.shape(). Try `vipy.video.Video.zeropadlike`. - The output video will be at the framerate of self.framerate(). - if you want to concatenate annotations, call `vipy.video.Scene.annotate` first on the videos to save the annotations into the pixels, then concatenate. """ assert len(tolist(videos))>0 and (all([isinstance(v, vipy.video.Video) for v in tolist(videos)]) or all([os.path.exists(f) and vipy.util.isvideofile(f) for f in tolist(videos)])) vi = tolist(videos) if all([isinstance(v, vipy.video.Video) for v in tolist(videos)]) else [cls(filename=f) for f in tolist(videos)] assert all([vij.shape() == vik.shape() for vij in vi for vik in vi]), "Video shapes must all the same, try padding" vo = cls(filename=outfile, framerate=vi[0].framerate()) with vo.stream(overwrite=True) as s: for v in vi: for im in v.clone().framerate(framerate).stream(): s.write(im) if youtube_chapters is not None: f = youtube_chapters if callable(youtube_chapters) else lambda v: str(v).replace('<','').replace('>','') # angle brackets not allowed print('[vipy.video.concatenate]: Copy the following into the video Description after uploading the videofile "%s" to YouTube to enable chapters on playback.\n' % outfile) print('\n'.join(['%s %s' % (vipy.util.seconds_to_MMSS_colon_notation(int(s)), str(f(v))) for (s,v) in zip(np.cumsum([0] + [v.duration() for v in vi][:-1]), vi)])); print('\n') if any([v.duration() < 10 for v in vi]): warnings.warn('YouTube chapters must be a minimum duration of 10 seconds') return vo
def from_json(s)
-
Import a json string as a
Video
object.This will perform a round trip from a video to json and back to a video object. This same operation is used for serialization of all vipy objects to JSON for storage.
v = vipy.video.Video.from_json(vipy.video.RandomVideo().json())
Expand source code Browse git
@classmethod def from_json(cls, s): """Import a json string as a `vipy.video.Video` object. This will perform a round trip from a video to json and back to a video object. This same operation is used for serialization of all vipy objects to JSON for storage. ```python v = vipy.video.Video.from_json(vipy.video.RandomVideo().json()) ``` """ d = json.loads(s) if not isinstance(s, dict) else s d = {k.lstrip('_'):v for (k,v) in d.items()} # prettyjson (remove "_" prefix to attributes) v = cls(filename=d['filename'], url=d['url'], framerate=d['framerate'], array=np.array(d['array']) if d['array'] is not None else None, colorspace=d['colorspace'], attributes=d['attributes'], startframe=d['startframe'], endframe=d['endframe'], startsec=d['startsec'], endsec=d['endsec']) v._ffmpeg = v._from_ffmpeg_commandline(d['ffmpeg']) return v.filename(d['filename']) if d['filename'] is not None else v.nofilename()
Methods
def abspath(self)
-
Change the path of the filename from a relative path to an absolute path (not relocatable)
Expand source code Browse git
def abspath(self): """Change the path of the filename from a relative path to an absolute path (not relocatable)""" return self.filename(os.path.normpath(os.path.abspath(os.path.expanduser(self.filename()))))
def array(self, array=None, copy=False)
-
Set or return the video buffer as a numpy array.
Args
array
- [np.array] A numpy array of size NxHxWxC = (frames, height, width, channels) of type uint8 or float32.
copy
- [bool] If true, copy the buffer by value instaed of by reference. Copied buffers do not share pixels.
Returns
if array=None, return a reference to the pixel buffer as a numpy array, otherwise return the video object.
Expand source code Browse git
def array(self, array=None, copy=False): """Set or return the video buffer as a numpy array. Args: array: [np.array] A numpy array of size NxHxWxC = (frames, height, width, channels) of type uint8 or float32. copy: [bool] If true, copy the buffer by value instaed of by reference. Copied buffers do not share pixels. Returns: if array=None, return a reference to the pixel buffer as a numpy array, otherwise return the video object. """ if array is None: return self._array elif isnumpy(array): assert array.dtype == np.float32 or array.dtype == np.uint8, "Invalid input - array() must be type uint8 or float32" assert array.ndim == 4, "Invalid input array() must be of shape NxHxWxC, for N frames, of size HxW with C channels" self._array = np.copy(array) if copy else array if copy: self._array.setflags(write=True) # mutable iterators, triggers copy self.colorspace(None) # must be set with colorspace() after array() before _convert() return self else: raise ValueError('Invalid input - array() must be numpy array')
def aspect_ratio(self)
-
The width/height of the video expressed as a fraction
Expand source code Browse git
def aspect_ratio(self): """The width/height of the video expressed as a fraction""" return self.width() / self.height()
def bias(self, b)
-
Pixelwise additive bias, such that each pixel p_{ij} = b + p_{ij}
Expand source code Browse git
def bias(self, b): """Pixelwise additive bias, such that each pixel p_{ij} = b + p_{ij}""" return self.normalize(mean=0, std=1, scale=1.0, bias=b)
def bytes(self)
-
Return a bytes representation of the video file
Expand source code Browse git
def bytes(self): """Return a bytes representation of the video file""" assert self.hasfilename(), "Invalid filename" with open(self.filename(), 'rb') as f: data = io.BytesIO(f.read()) return str(data.read()).encode('UTF-8')
def canload(self, frame=0)
-
Return True if the video can be previewed at frame=k successfully.
This is useful for filtering bad videos or filtering videos that cannot be loaded using your current FFMPEG version.
Notes: This will only try to preview a single frame. This will not check if the entire video is loadable. Use
Video.isloadable()
in this caseThis will hang if calling canload on a streaming URL.
Expand source code Browse git
def canload(self, frame=0): """Return True if the video can be previewed at frame=k successfully. This is useful for filtering bad videos or filtering videos that cannot be loaded using your current FFMPEG version. .. notes:: This will only try to preview a single frame. This will not check if the entire video is loadable. Use `vipy.video.Video.isloadable` in this case This will hang if calling canload on a streaming URL. """ if not self.isloaded(): try: self.preview(framenum=frame) # try to preview return True except: return False else: return True
def centercrop(self, shape, withbox=False)
-
Crop the video to shape=(H,W) preserving the integer centroid position, and optionally return the box
Expand source code Browse git
def centercrop(self, shape, withbox=False): """Crop the video to shape=(H,W) preserving the integer centroid position, and optionally return the box""" assert shape[0] <= self.height() and shape[1] <= self.width() # triggers preview() bb = vipy.geometry.BoundingBox(xcentroid=float(self.width()/2.0), ycentroid=float(self.height()/2.0), width=float(shape[1]), height=float(shape[0])).int() # may be outside frame self.crop(bb, zeropad=True) return self if not withbox else (self, bb)
def centersquare(self)
-
Crop video of size (NxN) in the center, such that N=min(width,height), keeping the video centroid constant
Expand source code Browse git
def centersquare(self): """Crop video of size (NxN) in the center, such that N=min(width,height), keeping the video centroid constant""" return self.centercrop( (min(self.height(), self.width()), min(self.height(), self.width())))
def channel(self, c)
-
Expand source code Browse git
def channel(self, c): self.load() assert c >= 0 and c < self.channels() self._array = self._array[:,:,:,c] if self._array is not None else self._array return self
def channels(self)
-
Return integer number of color channels
Expand source code Browse git
def channels(self): """Return integer number of color channels""" if not self.isloaded(): self._channels = 3 # always color video #previewhash = hashlib.md5(str(self._ffmpeg_commandline()).encode()).hexdigest() #if not hasattr(self, '_previewhash') or previewhash != self._previewhash: # im = self.preview() # ffmpeg chain changed, load a single frame of video # self._shape = (im.height(), im.width()) # cache the shape # self._channels = im.channels() # cache # self._previewhash = previewhash return self._channels # cached else: return 1 if self.load().array().ndim == 3 else self.load().array().shape[3]
def channelshape(self)
-
Return a tuple (channels, height, width) for the video
Expand source code Browse git
def channelshape(self): """Return a tuple (channels, height, width) for the video""" return (self.channels(), self.height(), self.width())
def clear(self)
-
Expand source code Browse git
def clear(self): """no-op for `vipy.video.Video` object, used only for `vipy.video.Scene`""" return self
def clearattributes(self)
-
Remove all attributes
Expand source code Browse git
def clearattributes(self): """Remove all attributes""" self.attributes = {} return self
def clip(self, startframe, endframe=None)
-
Load a video clip betweeen start and end frames
Expand source code Browse git
def clip(self, startframe, endframe=None): """Load a video clip betweeen start and end frames""" assert (endframe is None or startframe <= endframe) and startframe >= 0, "Invalid start and end frames (%s, %s)" % (str(startframe), str(endframe)) if not self.isloaded(): timestamp_in_seconds = ((self._startframe if self._startframe is not None else 0)+startframe)/float(self.framerate()) self._update_ffmpeg_seek(timestamp_in_seconds) if endframe is not None: self._ffmpeg = self._ffmpeg.setpts('PTS-STARTPTS') # reset timestamp to 0 before trim filter self._ffmpeg = self._ffmpeg.trim(start=0, end=(endframe-startframe)/self.framerate()) # must be in seconds to allow for framerate conversion self._ffmpeg = self._ffmpeg.setpts('PTS-STARTPTS') # reset timestamp to 0 after trim filter self._startframe = startframe if self._startframe is None else self._startframe + startframe # for __repr__ only self._endframe = (self._startframe + (endframe-startframe)) if endframe is not None else endframe # for __repr__ only else: endframe = endframe if endframe is not None else len(self._array) self._array = self._array[startframe:endframe] (self._startframe, self._endframe) = (0, endframe-startframe) return self
def cliprange(self)
-
Return the planned clip (startframe, endframe) range.
This is useful for introspection of the planned clip() before load(), such as for data augmentation purposes without triggering a load.
Returns
(startframe, endframe) of the video() such that after load(), the pixel buffer will contain frame=0 equivalent to startframe in the source video, and frame=endframe-startframe-1 equivalent to endframe in the source video. (0, None) If a video does not have a clip() (e.g. clip() was never called, the filter chain does not include a 'trim')
Notes: The endframe can be retrieved (inefficiently) using:
int(round(self.duration_in_frames_of_videofile() * (self.framerate() / self.framerate_of_videofile())))
Expand source code Browse git
def cliprange(self): """Return the planned clip (startframe, endframe) range. This is useful for introspection of the planned clip() before load(), such as for data augmentation purposes without triggering a load. Returns: (startframe, endframe) of the video() such that after load(), the pixel buffer will contain frame=0 equivalent to startframe in the source video, and frame=endframe-startframe-1 equivalent to endframe in the source video. (0, None) If a video does not have a clip() (e.g. clip() was never called, the filter chain does not include a 'trim') .. notes:: The endframe can be retrieved (inefficiently) using: ```python int(round(self.duration_in_frames_of_videofile() * (self.framerate() / self.framerate_of_videofile()))) ``` """ return (self._startframe if self._startframe is not None else 0, self._endframe)
def clone(self, flushforward=False, flushbackward=False, flush=False, flushfilter=False, rekey=False, flushfile=False, shallow=False, sharedarray=False, sanitize=True)
-
Create deep copy of video object, flushing the original buffer if requested and returning the cloned object.
Flushing is useful for distributed memory management to free the buffer from this object, and pass along a cloned object which can be used for encoding and will be garbage collected.
Args
flushforward
- copy the object, and set the cloned object
Video.array()
to None. This flushes the video buffer for the clone, not the object flushbackward
- copy the object, and set the object array() to None. This flushes the video buffer for the object, not the clone.
flush
- set the object array() to None and clone the object. This flushes the video buffer for both the clone and the object.
flushfilter
- Set the ffmpeg filter chain to the default in the new object, useful for saving new videos
flushfile
- Remove the filename and the URL from the video object. Useful for creating new video objects from loaded pixels.
rekey
- Generate new unique track ID and activity ID keys for this scene
shallow
- shallow copy everything (copy by reference), except for ffmpeg object. attributes dictionary is shallow copied
sharedarray
- deep copy of everything, except for pixel buffer which is shared. Changing the pixel buffer on self is reflected in the clone.
sanitize
- remove private attributes from self.attributes dictionary. A private attribute is any key with two leading underscores '__' which should not be propagated to clone
Returns
A deepcopy of the video object such that changes to self are not reflected in the copy
Note: Cloning videos is an expensive operation and can slow down real time code. Use sparingly.
Expand source code Browse git
def clone(self, flushforward=False, flushbackward=False, flush=False, flushfilter=False, rekey=False, flushfile=False, shallow=False, sharedarray=False, sanitize=True): """Create deep copy of video object, flushing the original buffer if requested and returning the cloned object. Flushing is useful for distributed memory management to free the buffer from this object, and pass along a cloned object which can be used for encoding and will be garbage collected. Args: flushforward: copy the object, and set the cloned object `vipy.video.Video.array` to None. This flushes the video buffer for the clone, not the object flushbackward: copy the object, and set the object array() to None. This flushes the video buffer for the object, not the clone. flush: set the object array() to None and clone the object. This flushes the video buffer for both the clone and the object. flushfilter: Set the ffmpeg filter chain to the default in the new object, useful for saving new videos flushfile: Remove the filename and the URL from the video object. Useful for creating new video objects from loaded pixels. rekey: Generate new unique track ID and activity ID keys for this scene shallow: shallow copy everything (copy by reference), except for ffmpeg object. attributes dictionary is shallow copied sharedarray: deep copy of everything, except for pixel buffer which is shared. Changing the pixel buffer on self is reflected in the clone. sanitize: remove private attributes from self.attributes dictionary. A private attribute is any key with two leading underscores '__' which should not be propagated to clone Returns: A deepcopy of the video object such that changes to self are not reflected in the copy .. note:: Cloning videos is an expensive operation and can slow down real time code. Use sparingly. """ if sanitize: a = self.attributes # copy reference to attributes to restore self.attributes = {} # remove attributes on self for fast clone() since private attributes will be filtered anyway if flush or (flushforward and flushbackward): self._array = None # flushes buffer on object and clone #self._previewhash = None self._shape = None v = copy.deepcopy(self) # object and clone are flushed elif flushbackward: v = copy.deepcopy(self) # propagates _array to clone self._array = None # object flushed, clone not flushed #self._previewhash = None self._shape = None elif flushforward: array = self._array; self._array = None #self._previewhash = None self._shape = None v = copy.deepcopy(self) # does not propagate _array to clone self._array = array # object not flushed v._array = None # clone flushed elif shallow: v = copy.copy(self) # shallow copy v._ffmpeg = copy.deepcopy(self._ffmpeg) # except for ffmpeg object v.attributes = {k:v for (k,v) in self.attributes.items()} # shallow copy of keys v._array = np.asarray(self._array) if self._array is not None else None # shared pixels elif sharedarray: array = self._array self._array = None v = copy.deepcopy(self) # deep copy of everything but pixels v._array = np.asarray(array) if array is not None else None # shared pixels self._array = array # restore else: v = copy.deepcopy(self) if flushfilter: v._ffmpeg = ffmpeg.input(v.filename()) # no other filters #v._previewhash = None v._shape = None (v._startframe, v._endframe) = (None, None) (v._startsec, v._endsec) = (None, None) if rekey: v.rekey() if flushfile: v.nofilename().nourl() if sanitize: self.attributes = a # restore attributes v.attributes = {k:v for (k,v) in self.attributes.items()} # shallow copy v.sanitize() # remove private attributes return v
def colorspace(self, colorspace=None)
-
Return or set the colorspace as ['rgb', 'bgr', 'lum', 'float']. This will not change pixels, only the colorspace interpretation of pixels.
Expand source code Browse git
def colorspace(self, colorspace=None): """Return or set the colorspace as ['rgb', 'bgr', 'lum', 'float']. This will not change pixels, only the colorspace interpretation of pixels.""" if colorspace is None: return self._colorspace elif self.isloaded(): assert str(colorspace).lower() in ['rgb', 'bgr', 'lum', 'float'] if self.array().dtype == np.float32: assert str(colorspace).lower() in ['float'] elif self.array().dtype == np.uint8: assert str(colorspace).lower() in ['rgb', 'bgr', 'lum'] if str(colorspace).lower() in ['lum']: assert self.channels() == 1, "Luminance colorspace must be one channel uint8" elif str(colorspace).lower() in ['rgb', 'bgr']: assert self.channels() == 3, "RGB or BGR colorspace must be three channel uint8" else: raise ValueError('Invalid array() type "%s" - only np.float32 or np.uint8 allowed' % str(self.array().dtype)) self._colorspace = str(colorspace).lower() return self
def commandline(self)
-
Return the equivalent ffmpeg command line string that will be used to transcode the video.
This is useful for introspecting the complex filter chain that will be used to process the video. You can try to run this command line yourself for debugging purposes, by replacing 'dummyfile' with an appropriately named output file.
Expand source code Browse git
def commandline(self): """Return the equivalent ffmpeg command line string that will be used to transcode the video. This is useful for introspecting the complex filter chain that will be used to process the video. You can try to run this command line yourself for debugging purposes, by replacing 'dummyfile' with an appropriately named output file. """ return self._ffmpeg_commandline()
def crop(self, bbi, zeropad=True)
-
Spatially crop the video using the supplied vipy.geometry.BoundingBox, can only be applied prior to load().
Note: Crop is performed in place overwriting pixels of self.array(). Clone() before crop() if array() must be preserved.
Expand source code Browse git
def crop(self, bbi, zeropad=True): """Spatially crop the video using the supplied vipy.geometry.BoundingBox, can only be applied prior to load(). .. note:: Crop is performed in place overwriting pixels of self.array(). Clone() before crop() if array() must be preserved. """ assert isinstance(bbi, vipy.geometry.BoundingBox), "Invalid input" bbc = bbi.clone().imclipshape(self.width(), self.height()).int() # clipped box to image rectangle bb = bbi.int() if zeropad else bbc # use clipped box if not zeropad if bb.isdegenerate(): return None elif not self.isloaded(): if zeropad and bb != bbc: # Crop outside the image rectangle will segfault ffmpeg, pad video first (if zeropad=False, then rangecheck will not occur!) self.zeropad(int(np.ceil(bb.width()-bbc.width())), int(np.ceil(bb.height()-bbc.height()))) # cannot be called in derived classes bb = bb.offset(int(np.ceil(bb.width()-bbc.width())), int(np.ceil(bb.height()-bbc.height()))) # Shift boundingbox by padding (integer coordinates) self._ffmpeg = self._ffmpeg.filter('crop', '%d' % bb.width(), '%d' % bb.height(), '%d' % bb.xmin(), '%d' % bb.ymin(), keep_aspect=0) # keep_aspect=False (disable exact=True, this is not present in older ffmpeg) else: self.array( bbc.crop(self.array()), copy=False ) # crop first, in-place, valid pixels only if zeropad and bb != bbc: ((dyb, dya), (dxb, dxa)) = ((max(0, int(abs(np.ceil(bb.ymin() - bbc.ymin())))), max(0, int(abs(np.ceil(bb.ymax() - bbc.ymax()))))), (max(0, int(abs(np.ceil(bb.xmin() - bbc.xmin())))), max(0, int(abs(np.ceil(bb.xmax() - bbc.xmax())))))) self._array = np.pad(self.load().array(), ((0,0), (dyb, dya), (dxb, dxa), (0, 0)), mode='constant') self.shape(shape=(bb.height(), bb.width())) # manually set shape return self
def cropeven(self)
-
Crop the video to the largest even (width,height) less than or equal to current (width,height). This is useful for some codecs or filters which require even shape.
Expand source code Browse git
def cropeven(self): """Crop the video to the largest even (width,height) less than or equal to current (width,height). This is useful for some codecs or filters which require even shape.""" return self.crop(vipy.geometry.BoundingBox(xmin=0, ymin=0, width=int(vipy.math.even(self.width())), height=int(vipy.math.even(self.height()))))
def delattribute(self, k)
-
Expand source code Browse git
def delattribute(self, k): if k in self.attributes: self.attributes.pop(k) return self
def dict(self)
-
Return a python dictionary containing the relevant serialized attributes suitable for JSON encoding.
Expand source code Browse git
def dict(self): """Return a python dictionary containing the relevant serialized attributes suitable for JSON encoding.""" return self.json(encode=False)
def download(self, ignoreErrors=False, timeout=10, verbose=True, max_filesize='350m')
-
Download URL to filename provided by constructor, or to temp filename.
Args
ignoreErrors
- [bool] If true, show a warning and return the video object, otherwise throw an exception
timeout
- [int] An integer timeout in seconds for the download to connect
verbose
- [bool] If trye, show more verbose console output
max_filesize
- [str] A string of the form 'NNNg' or 'NNNm' for youtube downloads to limit the maximum size of a URL to '350m' 350MB or '12g' for 12GB.
Returns
This video object with the video downloaded to the filename()
Expand source code Browse git
def download(self, ignoreErrors=False, timeout=10, verbose=True, max_filesize='350m'): """Download URL to filename provided by constructor, or to temp filename. Args: ignoreErrors: [bool] If true, show a warning and return the video object, otherwise throw an exception timeout: [int] An integer timeout in seconds for the download to connect verbose: [bool] If trye, show more verbose console output max_filesize: [str] A string of the form 'NNNg' or 'NNNm' for youtube downloads to limit the maximum size of a URL to '350m' 350MB or '12g' for 12GB. Returns: This video object with the video downloaded to the filename() """ if self._url is None and self._filename is not None: return self if self._url is None: raise ValueError('[vipy.video.download]: No URL to download') elif not isurl(str(self._url)): raise ValueError('[vipy.video.download]: Invalid URL "%s" ' % self._url) try: url_scheme = urllib.parse.urlparse(self._url)[0] if isyoutubeurl(self._url): f = self._filename if filefull(self._filename) is None else filefull(self._filename) vipy.videosearch.download(self._url, f, writeurlfile=False, skip=ignoreErrors, verbose=verbose, max_filesize=max_filesize) for ext in ['mkv', 'mp4', 'webm']: f = '%s.%s' % (self.filename(), ext) if os.path.exists(f): self.filename(f) # change the filename to match the youtube extension break if not self.hasfilename(): raise ValueError('Downloaded file not found "%s.*"' % self.filename()) elif url_scheme in ['http', 'https'] and (isvideourl(self._url) or iswebp(self._url)): vipy.downloader.download(self._url, self._filename, verbose=verbose, timeout=timeout, sha1=None, username=None, password=None) elif url_scheme == 'file': shutil.copyfile(self._url, self._filename) elif url_scheme == 's3': if self.filename() is None: self.filename(totempdir(self._url)) if vipy.globals.cache() is not None: self.filename(os.path.join(remkdir(vipy.globals.cache()), filetail(self._url))) vipy.downloader.s3(self.url(), self.filename(), verbose=verbose) elif url_scheme == 'scp': if self.filename() is None: self.filename(templike(self._url)) if vipy.globals.cache() is not None: self.filename(os.path.join(remkdir(vipy.globals.cache()), filetail(self._url))) vipy.downloader.scp(self._url, self.filename(), verbose=verbose) elif not isvideourl(self._url) and vipy.videosearch.is_downloadable_url(self._url): vipy.videosearch.download(self._url, filefull(self._filename), writeurlfile=False, skip=ignoreErrors, verbose=verbose, max_filesize=max_filesize) for ext in ['mkv', 'mp4', 'webm']: f = '%s.%s' % (self.filename(), ext) if os.path.exists(f): self.filename(f) break if not self.hasfilename(): raise ValueError('Downloaded filenot found "%s.*"' % self.filename()) elif url_scheme == 'rtsp': # https://ffmpeg.org/ffmpeg-protocols.html#rtsp pass else: raise NotImplementedError( 'Invalid URL scheme "%s" for URL "%s"' % (url_scheme, self._url)) except (httplib.BadStatusLine, urllib.error.URLError, urllib.error.HTTPError): if ignoreErrors: warnings.warn('[vipy.video][WARNING]: download failed - Ignoring Video') self._array = None else: raise except IOError: if ignoreErrors: warnings.warn('[vipy.video][WARNING]: IO error - Invalid video file, url or invalid write permissions "%s" - Ignoring video' % self.filename()) self._array = None else: raise except KeyboardInterrupt: raise except Exception: if ignoreErrors: warnings.warn('[vipy.video][WARNING]: load error for video "%s"' % self.filename()) else: raise return self
def downloadif(self, ignoreErrors=False, timeout=10, verbose=True, max_filesize='350m')
-
Download URL to filename if the filename has not already been downloaded
Expand source code Browse git
def downloadif(self, ignoreErrors=False, timeout=10, verbose=True, max_filesize='350m'): """Download URL to filename if the filename has not already been downloaded""" return self.download(ignoreErrors=ignoreErrors, timeout=timeout, verbose=verbose, max_filesize=max_filesize) if self.hasurl() and not self.isdownloaded() else self
def duration(self, frames=None, seconds=None, minutes=None)
-
Return a video clipped with frame indexes between (0, frames) or (0,secondsself.framerate()) or (0,minutes60*self.framerate(). Return duration in seconds if no arguments are provided.
Expand source code Browse git
def duration(self, frames=None, seconds=None, minutes=None): """Return a video clipped with frame indexes between (0, frames) or (0,seconds*self.framerate()) or (0,minutes*60*self.framerate(). Return duration in seconds if no arguments are provided.""" if frames is None and seconds is None and minutes is None: return self.duration_in_seconds_of_videofile() if not self.isloaded() else (len(self) / self.framerate()) assert frames is not None or seconds is not None or minutes is not None frames = frames if frames is not None else ((int(seconds*self.framerate()) if seconds is not None else 0) + (int(minutes*60*self.framerate()) if minutes is not None else 0)) return self.clip(0, frames)
def duration_in_frames(self)
-
Return the duration of the video filter chain in frames, equal to round(self.duration()*self.framerate()). Requires a probe() of the video to get duration
Expand source code Browse git
def duration_in_frames(self): """Return the duration of the video filter chain in frames, equal to round(self.duration()*self.framerate()). Requires a probe() of the video to get duration""" return int(round(self.duration()*self.framerate()))
def duration_in_frames_of_videofile(self)
-
Return video duration of the source video file (NOT the filter chain) in frames, requires ffprobe.
Notes: This is the duration of the source video and NOT the duration of the filter chain. If you load(), this may be different duration depending on clip() or framerate() directives.
Expand source code Browse git
def duration_in_frames_of_videofile(self): """Return video duration of the source video file (NOT the filter chain) in frames, requires ffprobe. .. notes:: This is the duration of the source video and NOT the duration of the filter chain. If you load(), this may be different duration depending on clip() or framerate() directives. """ return int(np.floor(self.duration_in_seconds_of_videofile()*self.framerate_of_videofile()))
def duration_in_seconds_of_videofile(self)
-
Return video duration of the source filename (NOT the filter chain) in seconds, requires ffprobe. Fetch once and cache.
Notes: This is the duration of the source video and NOT the duration of the filter chain. If you load(), this may be different duration depending on clip() or framerate() directives.
Expand source code Browse git
def duration_in_seconds_of_videofile(self): """Return video duration of the source filename (NOT the filter chain) in seconds, requires ffprobe. Fetch once and cache. .. notes:: This is the duration of the source video and NOT the duration of the filter chain. If you load(), this may be different duration depending on clip() or framerate() directives. """ filehash = hashlib.md5(str(self.downloadif().filename()).encode()).hexdigest() if self.hasattribute('_duration_in_seconds_of_videofile') and self.attributes['__duration_in_seconds_of_videofile']['filehash'] == filehash: return self.attributes['__duration_in_seconds_of_videofile']['duration'] else: d = float(self.probe()['format']['duration']) self.attributes['__duration_in_seconds_of_videofile'] = {'duration':d, 'filehash':filehash} # for next time, private attribute return d
def fetch(self, ignoreErrors=False)
-
Download only if hasfilename() is not found
Expand source code Browse git
def fetch(self, ignoreErrors=False): """Download only if hasfilename() is not found""" return self.download(ignoreErrors=ignoreErrors) if not self.hasfilename() else self
def ffplay(self)
-
Play the video file using ffplay
Expand source code Browse git
def ffplay(self): """Play the video file using ffplay""" assert self.hasfilename() or (self.hasurl() and self.download().hasfilename()) # triggers download if needed cmd = 'ffplay "%s"' % self.filename() print('[vipy.video.play]: Executing "%s"' % cmd) os.system(cmd) return self
def filename(self, newfile=None, copy=False, symlink=False)
-
Update video Filename with optional copy or symlink from existing file (self.filename()) to new file
Expand source code Browse git
def filename(self, newfile=None, copy=False, symlink=False): """Update video Filename with optional copy or symlink from existing file (self.filename()) to new file""" if newfile is None: return self._filename if not isurl(newfile): # Copy or symlink from the old filename to the new filename (if requested) newfile = os.path.normpath(os.path.expanduser(newfile)) if copy: assert self.hasfilename(), "File not found for copy" remkdir(filepath(newfile)) shutil.copyfile(self._filename, newfile) elif symlink: assert self.hasfilename(), "File not found for symlink" remkdir(filepath(newfile)) if os.path.islink(newfile) and os.path.abspath(os.readlink(newfile)) == os.path.normpath(os.path.abspath(os.path.expanduser(self.filename()))): pass # already points to the same place, nothing to do else: os.symlink(self._filename, newfile) # Update ffmpeg filter chain with new input node filename (this file may not exist yet) self._update_ffmpeg('filename', newfile) self._filename = newfile return self
def filesize(self)
-
Return the size in bytes of the filename(), None if the filename() is invalid
Expand source code Browse git
def filesize(self): """Return the size in bytes of the filename(), None if the filename() is invalid""" return os.path.getsize(self.filename()) if self.hasfilename() else None
def fliplr(self)
-
Mirror the video left/right by flipping horizontally
Expand source code Browse git
def fliplr(self): """Mirror the video left/right by flipping horizontally""" if not self.isloaded(): self._ffmpeg = self._ffmpeg.filter('hflip') else: self.array(np.stack([np.fliplr(f) for f in self._array]), copy=False) return self
def flipud(self)
-
Rotate the video 90 degrees counter-clockwise, can only be applied prior to load()
Expand source code Browse git
def flipud(self): """Rotate the video 90 degrees counter-clockwise, can only be applied prior to load()""" assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" self._ffmpeg = self._ffmpeg.filter('vflip') return self
def float(self)
-
Expand source code Browse git
def float(self): self.load() self._array = self._array.astype(np.float32) if self._array is not None else self._array return self
def flush(self)
-
Alias for clone(flush=True), returns self not clone
Expand source code Browse git
def flush(self): """Alias for clone(flush=True), returns self not clone""" self._array = None # flushes buffer on object and clone self.delattribute("__stream_buffer") # remove if present #self._previewhash = None self._shape = None return self
def flush_and_return(self, retval)
-
Flush the video and return the parameter supplied, useful for long fluent chains
Expand source code Browse git
def flush_and_return(self, retval): """Flush the video and return the parameter supplied, useful for long fluent chains""" self.flush() return retval
def frame(self, k=0, img=None, t=None)
-
Return the kth frame as an
vipy.image Image
objectExpand source code Browse git
def frame(self, k=0, img=None, t=None): """Return the kth frame as an `vipy.image Image` object""" k = int(self.framerate()*t) if t is not None else k assert isinstance(k, int) and k>=0, "Frame index must be non-negative integer" return Image(array=img if img is not None else (self._array[k] if self.isloaded() else self.preview(k).array()), colorspace=self.colorspace())
def frame_meta(self, k=None)
-
Return the frame metadata of the underlying video file using ffprobe for all frames as a list of dicts, each list element corresponding to a frame. This is useful for extracting frame types (e.g. i-frames).
Args
k [int]: Return only the frame metadata for frame index k (relative to framerate of source videofile, not filter chain)
Returns
a list of metadata dicts (one per frame) or a single dict for the requested frame.
Notes
- This will return a large amount of metadata for the entire source video (not the FFMPEG filter chain), use with caution.
- To get frame metata for a filter chain use vipy.video.Video.savetemp().frame_meta(), which will save the video to a temporary file prior to extracting frame metadata
Expand source code Browse git
def frame_meta(self, k=None): """Return the frame metadata of the underlying video file using ffprobe for all frames as a list of dicts, each list element corresponding to a frame. This is useful for extracting frame types (e.g. i-frames). Args: k [int]: Return only the frame metadata for frame index k (relative to framerate of source videofile, not filter chain) Returns: a list of metadata dicts (one per frame) or a single dict for the requested frame. .. notes:: - This will return a large amount of metadata for the entire source video (not the FFMPEG filter chain), use with caution. - To get frame metata for a filter chain use vipy.video.Video.savetemp().frame_meta(), which will save the video to a temporary file prior to extracting frame metadata """ d = self.probe(show_frames=None).get('frames') return d if k is None else d[k]
def framelist(self)
-
Expand source code Browse git
def framelist(self): return list(self.frames())
def framerate(self, fps=None)
-
Change the input framerate for the video and update frame indexes for all annotations
Args
fps
- [Float] frames per second to process the underlying video
Returns
If fps is None, return the current framerate, otherwise set the framerate to fps
Expand source code Browse git
def framerate(self, fps=None): """Change the input framerate for the video and update frame indexes for all annotations Args: fps: [Float] frames per second to process the underlying video Returns: If fps is None, return the current framerate, otherwise set the framerate to fps """ if fps is None: return self._framerate elif float(fps) == self._framerate: return self else: assert not self.isloaded(), "Filters can only be applied prior to load()" if 'fps=' in self._ffmpeg_commandline(): self._update_ffmpeg('fps', float(fps)) # replace fps filter, do not add to it else: self._ffmpeg = self._ffmpeg.filter('fps', fps=float(fps), round='up') # create fps filter first time # if '-ss' in self._ffmpeg_commandline(): # No change is needed here. The seek is in seconds and is independent of the framerate # if 'trim' in self._ffmpeg_commandline(): # No change is needed here. The trim is in units of seconds which is independent of the framerate self._framerate = float(fps) return self
def framerate_of_videofile(self)
-
Return video framerate in frames per second of the source video file (NOT the filter chain), requires ffprobe.
Expand source code Browse git
def framerate_of_videofile(self): """Return video framerate in frames per second of the source video file (NOT the filter chain), requires ffprobe. """ p = self.probe() assert 'streams' in p and len(['streams']) > 0 fps = p['streams'][0]['avg_frame_rate'] return float(fps) if '/' not in fps else (float(fps.split('/')[0]) / float(fps.split('/')[1])) # fps='30/1' or fps='30.0'
def frames(self)
-
Alias for iter()
Expand source code Browse git
def frames(self): """Alias for __iter__()""" return self.__iter__()
def fromarray(self, array)
-
Alias for self.array(…, copy=True), which forces the new array to be a copy
Expand source code Browse git
def fromarray(self, array): """Alias for self.array(..., copy=True), which forces the new array to be a copy""" return self.array(array, copy=True)
def fromdirectory(self, indir, sortkey=None)
-
Create a video from a directory of frames stored as individual image filenames.
Given a directory with files:
framedir/image_0001.jpg framedir/image_0002.jpg
vipy.video.Video(frames='/path/to/framedir')
Expand source code Browse git
def fromdirectory(self, indir, sortkey=None): """Create a video from a directory of frames stored as individual image filenames. Given a directory with files: framedir/image_0001.jpg framedir/image_0002.jpg ```python vipy.video.Video(frames='/path/to/framedir') ``` """ return self.fromframes([vipy.image.Image(filename=f) for f in sorted(vipy.util.imlist(indir), key=sortkey)])
def fromframes(self, framelist, copy=True)
-
Create a video from a list of frames
Expand source code Browse git
def fromframes(self, framelist, copy=True): """Create a video from a list of frames""" assert all([isinstance(im, vipy.image.Image) for im in framelist]), "Invalid input" return self.array(np.stack([im.load().array() if im.load().array().ndim == 3 else np.expand_dims(im.load().array(), 2) for im in framelist]), copy=copy).colorspace(framelist[0].colorspace())
def gain(self, g)
-
Pixelwise multiplicative gain, such that each pixel p_{ij} = g * p_{ij}
Expand source code Browse git
def gain(self, g): """Pixelwise multiplicative gain, such that each pixel p_{ij} = g * p_{ij}""" return self.normalize(0, 1, scale=g)
def getattribute(self, k)
-
Return the key k in the attributes dictionary (self.attributes) if present, else None
Expand source code Browse git
def getattribute(self, k): """Return the key k in the attributes dictionary (self.attributes) if present, else None""" return self.attributes[k] if k in self.attributes else None
def gif(self, outfile, pause=3, smallest=False, smaller=False, framerate=None)
-
Save a video to an animated GIF file, with pause=N seconds between loops.
Args
pause
- Integer seconds to pause between loops of the animation
smallest
- If true, create the smallest possible file but takes much longer to run
smaller
- if trye, create a smaller file, which takes a little longer to run
framerate [float]: The output framerate of the webp file. The default is the framerate of the video.
Returns
The filename of the animated GIF of this video
Warning: This will be very large for big videos, consider using
Video.webp()
instead.Expand source code Browse git
def gif(self, outfile, pause=3, smallest=False, smaller=False, framerate=None): """Save a video to an animated GIF file, with pause=N seconds between loops. Args: pause: Integer seconds to pause between loops of the animation smallest: If true, create the smallest possible file but takes much longer to run smaller: if trye, create a smaller file, which takes a little longer to run framerate [float]: The output framerate of the webp file. The default is the framerate of the video. Returns: The filename of the animated GIF of this video .. warning:: This will be very large for big videos, consider using `vipy.video.Video.webp` instead. """ assert isgif(outfile) return self.webp(outfile, pause, strict=False, smallest=smallest, smaller=True, framerate=framerate)
def hasattribute(self, k)
-
Does the attributes dictionary (self.attributes) contain the provided key
Expand source code Browse git
def hasattribute(self, k): """Does the attributes dictionary (self.attributes) contain the provided key""" return isinstance(self.attributes, dict) and k in self.attributes
def hasfilename(self)
-
Does the filename returned from
Video.filename()
exist?Expand source code Browse git
def hasfilename(self): """Does the filename returned from `vipy.video.Video.filename` exist?""" return self._filename is not None and (os.path.exists(self._filename) or isRTSPurl(self._filename) or isRTMPurl(self._filename))
def hasurl(self)
-
Is the url returned from
Video.url()
a well formed url?Expand source code Browse git
def hasurl(self): """Is the url returned from `vipy.video.Video.url` a well formed url?""" return self._url is not None and isurl(self._url)
def height(self)
-
Height (rows) in pixels of the video for the current filter chain
Expand source code Browse git
def height(self): """Height (rows) in pixels of the video for the current filter chain""" return self.shape()[0]
def iframes(self)
-
Return a list of i-frame indexes (e.g. intra-frame, a video frame that is independent from other frames for decoding) in this video file.
Note
- To return the i-frame indexes for the current filter chain use self.saveas().iframes() to save to a temporary file prior to i-frame index extraction.
- To extract the i-frame itself, use [self.frame(k) for k in self.iframes()]
Expand source code Browse git
def iframes(self): """Return a list of i-frame indexes (e.g. intra-frame, a video frame that is independent from other frames for decoding) in this video file. .. note:: - To return the i-frame indexes for the current filter chain use self.saveas().iframes() to save to a temporary file prior to i-frame index extraction. - To extract the i-frame itself, use [self.frame(k) for k in self.iframes()] """ return [k for (k,d) in enumerate(self.metaframe()) if d['pict_type'] == 'I']
def iscolor(self)
-
Is the video a three channel color video as returned from
Video.channels()
?Expand source code Browse git
def iscolor(self): """Is the video a three channel color video as returned from `vipy.video.Video.channels`?""" return self.channels() == 3
def isdownloaded(self)
-
Does the filename returned from
Video.filename()
exist, meaning that the url has been downloaded to a local file?Expand source code Browse git
def isdownloaded(self): """Does the filename returned from `vipy.video.Video.filename` exist, meaning that the url has been downloaded to a local file?""" return self._filename is not None and os.path.exists(self._filename)
def isgrayscale(self)
-
Is the video a single channel as returned from
Video.channels()
?Expand source code Browse git
def isgrayscale(self): """Is the video a single channel as returned from `vipy.video.Video.channels`?""" return self.channels() == 1
def islive(self)
-
Expand source code Browse git
def islive(self): return self.hasurl() and (isRTSPurl(self._url) or isRTMPurl(self._url))
def isloadable(self, flush=True)
-
Return True if the video can be loaded successfully.
This is useful for filtering bad videos or filtering videos that cannot be loaded using your current FFMPEG version.
Args
flush
- [bool] If true, flush the video after it loads. This will clear the video pixel buffer
Returns
True if load() can be called without FFMPEG exception.
If flush=False, then self will contain the loaded video, which is helpful to avoid load() twice in some conditionsWarning: This requires loading and flushing the video. This is an expensive operation when performed on many videos and may result in out of memory conditions with long videos. Use with caution! Try
Video.canload()
to test if a single frame can be loaded as a less expensive alternative.Expand source code Browse git
def isloadable(self, flush=True): """Return True if the video can be loaded successfully. This is useful for filtering bad videos or filtering videos that cannot be loaded using your current FFMPEG version. Args: flush: [bool] If true, flush the video after it loads. This will clear the video pixel buffer Returns: True if load() can be called without FFMPEG exception. If flush=False, then self will contain the loaded video, which is helpful to avoid load() twice in some conditions .. warning:: This requires loading and flushing the video. This is an expensive operation when performed on many videos and may result in out of memory conditions with long videos. Use with caution! Try `vipy.video.Video.canload` to test if a single frame can be loaded as a less expensive alternative. """ if not self.isloaded(): try: self.load() # try to load the whole thing if flush: self.flush() return True except: return False else: return True
def isloaded(self)
-
Return True if the video has been loaded
Expand source code Browse git
def isloaded(self): """Return True if the video has been loaded""" return self._array is not None
def issquare(self)
-
Return true if the video has square dimensions (height == width), else false
Expand source code Browse git
def issquare(self): """Return true if the video has square dimensions (height == width), else false""" s = self.shape() return s[0] == s[1]
def isstreaming(self)
-
Expand source code Browse git
def isstreaming(self): return isRTSPurl(self._filename) or isRTMPurl(self._filename)
def json(self, encode=True)
-
Return a json representation of the video.
Args
encode
- If true, return a JSON encoded string using json.dumps
Returns
A JSON encoded string if encode=True, else returns a dictionary object
Note: If the video is loaded, then the JSON will not include the pixels. Try using
Video.store()
to serialize videos, or callVideo.flush()
first.Expand source code Browse git
def json(self, encode=True): """Return a json representation of the video. Args: encode: If true, return a JSON encoded string using json.dumps Returns: A JSON encoded string if encode=True, else returns a dictionary object .. note:: If the video is loaded, then the JSON will not include the pixels. Try using `vipy.video.Video.store` to serialize videos, or call `vipy.video.Video.flush` first. """ if self.isloaded(): warnings.warn("JSON serialization of loaded video is inefficient. Try store()/restore()/unstore() instead to serialize videos as standalone objects efficiently, or flush() any loaded videos prior to serialization to quiet this warning.") d = {'filename':self._filename, 'url':self._url, 'framerate':self._framerate, 'array':None, 'colorspace':self._colorspace, 'attributes':self.attributes, 'startframe':self._startframe, 'endframe':self._endframe, 'endsec':self._endsec, 'startsec':self._startsec, 'ffmpeg':self._ffmpeg_commandline()} return json.dumps(d) if encode else d
def load(self, verbose=False, ignoreErrors=False, shape=None)
-
Load a video using ffmpeg, applying the requested filter chain.
Args
verbose
- [bool] if True. then ffmpeg console output will be displayed.
ignoreErrors
- [bool] if True, then all load errors are warned and skipped. Be sure to call isloaded() to confirm loading was successful.
shape
- [tuple (height, width, channels)] If provided, use this shape for reading and reshaping the byte stream from ffmpeg. This is useful for efficient loading in some scenarios. Knowing the final output shape can speed up loads by avoiding a preview() of the filter chain to get the frame size
Returns
this video object, with the pixels loaded in self.array()
Warning: Loading long videos can result in out of memory conditions. Try to call clip() first to extract a video segment to load().
Expand source code Browse git
def load(self, verbose=False, ignoreErrors=False, shape=None): """Load a video using ffmpeg, applying the requested filter chain. Args: verbose: [bool] if True. then ffmpeg console output will be displayed. ignoreErrors: [bool] if True, then all load errors are warned and skipped. Be sure to call isloaded() to confirm loading was successful. shape: [tuple (height, width, channels)] If provided, use this shape for reading and reshaping the byte stream from ffmpeg. This is useful for efficient loading in some scenarios. Knowing the final output shape can speed up loads by avoiding a preview() of the filter chain to get the frame size Returns: this video object, with the pixels loaded in self.array() .. warning:: Loading long videos can result in out of memory conditions. Try to call clip() first to extract a video segment to load(). """ if self.isloaded(): return self elif not self.hasfilename() and self.hasurl(): self.download(ignoreErrors=ignoreErrors) elif not self.hasfilename() and not ignoreErrors: raise ValueError('Invalid input - load() requires a valid URL, filename or array') if not self.hasfilename() and ignoreErrors: print('[vipy.video.load]: Video file "%s" not found - Ignoring' % self.filename()) return self if iswebp(self.filename()) or isgif(self.filename()): frames = [] pil = PIL.Image.open(self.filename()) self._framerate = (1000.0 / pil.info['duration']) if 'duration' in pil.info else self._framerate for k in range(pil.n_frames): pil.seek(k) frames.append(np.array(pil.convert('RGB'))) return self.array(np.stack(frames)).colorspace('RGB') # Load the video with ffmpeg # # [EXCEPTION]: older ffmpeg versions may segfault on complex crop filter chains # -On some versions of ffmpeg setting -cpuflags=0 fixes it, but the right solution is to rebuild from the head (30APR20) if verbose: print('[vipy.video.load]: Loading "%s"' % self.filename()) try: f_prepipe = copy.deepcopy(self._ffmpeg) f = self._ffmpeg.output('pipe:', format='rawvideo', pix_fmt='rgb24')\ .global_args('-cpuflags', '0', '-loglevel', 'debug' if vipy.globals.isdebug() else 'quiet') (out, err) = f.run(capture_stdout=True, capture_stderr=True) except Exception as e: if not ignoreErrors: raise ValueError('[vipy.video.load]: Load failed with error "%s"\n\n - Video: "%s"\n - FFMPEG command: \'sh> %s\'\n - This error usually means that the video is corrupted or that you need to upgrade your FFMPEG distribution to the latest stable version.\n - Try running the output of the ffmpeg command for debugging.' % (str(e), str(self), str(self._ffmpeg_commandline(f_prepipe.output('preview.mp4'))))) else: return self # Failed, return immediately, useful for calling canload() # Video shape: # - due to complex filter chains, we may not know the final video size without executing it # - However, this introduces extra cost by calling preview() on each filter chain # - If we know what the shape will be (e.g. we made the video square with a known size), then use it here directly (height, width, channels) = (self.height(), self.width(), self.channels()) if shape is None else shape # [EXCEPTION]: older ffmpeg versions may be off by one on the size returned from self.preview() which uses an image decoder vs. f.run() which uses a video decoder # -Try to correct this manually by searching for a off-by-one-pixel decoding that works. The right way is to upgrade your FFMPEG version to the FFMPEG head (11JUN20) # -We cannot tell which is the one that the end-user wanted, so we leave it up to the calling function to check dimensions (see self.resize()) if (len(out) % (height*width*channels)) != 0: #warnings.warn('Your FFMPEG version is triggering a known bug that is being worked around in an inefficient manner. Consider upgrading your FFMPEG distribution.') if (len(out) % ((height-1)*(width-1)*channels) == 0): (newwidth, newheight) = (width-1, height-1) elif (len(out) % ((height-1)*(width)*channels) == 0): (newwidth, newheight) = (width, height-1) elif (len(out) % ((height-1)*(width+1)*channels) == 0): (newwidth, newheight) = (width+1, height-1) elif (len(out) % ((height)*(width-1)*channels) == 0): (newwidth, newheight) = (width-1, height) elif (len(out) % ((height)*(width+1)*channels) == 0): (newwidth, newheight) = (width+1, height) elif (len(out) % ((height+1)*(width-1)*channels) == 0): (newwidth, newheight) = (width-1, height+1) elif (len(out) % ((height+1)*(width)*channels) == 0): (newwidth, newheight) = (width, height+1) elif (len(out) % ((height+1)*(width+1)*channels) == 0): (newwidth, newheight) = (width+1, height+1) else: (newwidth, newheight) = (width, height) is_loadable = (len(out) % (newheight*newwidth*channels)) == 0 if not is_loadable: im = self.preview() # get the real shape... (newheight, newwidth, newchannels) = (im.height(), im.width(), im.channels()) assert is_loadable or ignoreErrors, "Load failed for video '%s', and FFMPEG command line: '%s'" % (str(self), str(self._ffmpeg_commandline(f))) self._array = np.frombuffer(out, np.uint8).reshape([-1, newheight, newwidth, channels]) if is_loadable else None # read-only self.colorspace('rgb' if channels == 3 else 'lum') self.resize(rows=height, cols=width) # Very expensive framewise resizing so that the loaded video is identical shape to preview else: self._array = np.frombuffer(out, np.uint8).reshape([-1, height, width, channels]) # read-only self.colorspace('rgb' if channels == 3 else 'lum') return self
def map(self, func)
-
Apply lambda function to the loaded numpy array img, changes pixels not shape
Lambda function must have the following signature: * newimg = func(img) * img: HxWxC numpy array for a single frame of video * newimg: HxWxC modified numpy array for this frame. Change only the pixels, not the shape
The lambda function will be applied to every frame in the video in frame index order.
Expand source code Browse git
def map(self, func): """Apply lambda function to the loaded numpy array img, changes pixels not shape Lambda function must have the following signature: * newimg = func(img) * img: HxWxC numpy array for a single frame of video * newimg: HxWxC modified numpy array for this frame. Change only the pixels, not the shape The lambda function will be applied to every frame in the video in frame index order. """ assert isinstance(func, types.LambdaType), "Input must be lambda function with np.array() input and np.array() output" oldimgs = self.load().array() self.array(np.apply_along_axis(func, 0, self._array)) # FIXME: in-place operation? if (any([oldimg.dtype != newimg.dtype for (oldimg, newimg) in zip(oldimgs, self.array())]) or any([oldimg.shape != newimg.shape for (oldimg, newimg) in zip(oldimgs, self.array())])): self.colorspace('float') # unknown colorspace after shape or type transformation, set generic return self
def maxdim(self, dim=None)
-
Resize the video so that the maximum of (width,height)=dim, preserving aspect ratio
Expand source code Browse git
def maxdim(self, dim=None): """Resize the video so that the maximum of (width,height)=dim, preserving aspect ratio""" assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" (H,W) = self.shape() # yuck, need to get image dimensions before filter return max(H,W) if dim is None else (self.resize(cols=dim) if W>H else self.resize(rows=dim))
def maxmatte(self)
-
Return a square video with dimensions (self.maxdim(), self.maxdim()) with zeropadded lack bars or mattes above or below the video forming a letterboxed video.
Expand source code Browse git
def maxmatte(self): """Return a square video with dimensions (self.maxdim(), self.maxdim()) with zeropadded lack bars or mattes above or below the video forming a letterboxed video.""" return self.zeropad(max(1, int((max(self.shape()) - self.width())/2)), max(int((max(self.shape()) - self.height())/2), 1)).maxsquare()
def maxsquare(self)
-
Pad the video to be square, preserving the upper left corner of the video
Expand source code Browse git
def maxsquare(self): """Pad the video to be square, preserving the upper left corner of the video""" # This ffmpeg filter can throw the error: "Padded dimensions cannot be smaller than input dimensions." since the preview is off by one. Add one here to make sure. # FIXME: not sure where in some filter chains this off-by-one error is being introduced, but probably does not matter since it does not affect any annotations # and since the max square always preserves the scale and the upper left corner of the source video. # FIXME: this may trigger an inefficient resizing operation during load() if not self.issquare(): d = max(self.shape()) self._ffmpeg = self._ffmpeg.filter('pad', d+1, d+1, 0, 0) self.shape(shape=(d+1, d+1)) return self.crop(vipy.geometry.BoundingBox(xmin=0, ymin=0, width=int(d), height=int(d))) else: return self
def metadata(self, k=None)
-
Return a dictionary of metadata about this video.
Args
k [str]: If provided, return just the specified key of the attributes dictionary, otherwise return the attributes dictionary
Returns
The 'attributes' dictionary, or just the value for the provided key k if provided
Expand source code Browse git
def metadata(self, k=None): """Return a dictionary of metadata about this video. Args: k [str]: If provided, return just the specified key of the attributes dictionary, otherwise return the attributes dictionary Returns: The 'attributes' dictionary, or just the value for the provided key k if provided """ return self.attributes if k is None else self.attributes[k]
def metaframe(self, k=None)
-
Alias for
Video.frame_meta()
Expand source code Browse git
def metaframe(self, k=None): """Alias for `vipy.video.Video.frame_meta`""" return self.frame_meta(k)
def mindim(self, dim=None)
-
Resize the video so that the minimum of (width,height)=dim, preserving aspect ratio
Expand source code Browse git
def mindim(self, dim=None): """Resize the video so that the minimum of (width,height)=dim, preserving aspect ratio""" (H,W) = self.shape() # yuck, need to get image dimensions before filter return min(self.shape()) if dim is None else (self if min(H,W) == dim else (self.resize(cols=dim) if W<H else self.resize(rows=dim)))
def minsquare(self)
-
Return a square crop of the video, preserving the upper left corner of the video
Expand source code Browse git
def minsquare(self): """Return a square crop of the video, preserving the upper left corner of the video""" d = min(self.shape()) self.shape(shape=(d, d)) return self.crop(vipy.geometry.BoundingBox(xmin=0, ymin=0, width=int(d), height=int(d)))
def mutable(self)
-
Return a video object with a writeable mutable frame array. Video must be loaded, triggers copy of underlying numpy array if the buffer is not writeable.
Returns
This object with a mutable frame buffer in self.array() or self.numpy()
Expand source code Browse git
def mutable(self): """Return a video object with a writeable mutable frame array. Video must be loaded, triggers copy of underlying numpy array if the buffer is not writeable. Returns: This object with a mutable frame buffer in self.array() or self.numpy() """ assert self.isloaded() self._array = np.copy(self._array) if not self._array.flags['WRITEABLE'] else self._array # triggers copy self._array.setflags(write=True) # mutable iterators, torch conversion return self
def nofilename(self)
-
Expand source code Browse git
def nofilename(self): self._filename = None self._update_ffmpeg('filename', None) return self
def normalize(self, mean, std, scale=1, bias=0)
-
Pixelwise whitening, out = ((scale*in) - mean) / std); triggers load(). All computations float32
Expand source code Browse git
def normalize(self, mean, std, scale=1, bias=0): """Pixelwise whitening, out = ((scale*in) - mean) / std); triggers load(). All computations float32""" assert scale >= 0, "Invalid input" assert all([s > 0 for s in tolist(std)]), "Invalid input" self._array = vipy.math.normalize(self._array, np.array(mean, dtype=np.float32), np.array(std, dtype=np.float32), np.float32(scale)) if bias != 0: self._array = self._array + np.array(bias, dtype=np.float32) return self.colorspace('float')
def nourl(self)
-
Remove the
Video.url()
from the videoExpand source code Browse git
def nourl(self): """Remove the `vipy.video.Video.url` from the video""" (self._url, self._urluser, self._urlpassword, self._urlsha1) = (None, None, None, None) return self
def numpy(self)
-
Convert the video to a writeable numpy array, triggers a load() and copy() as needed. Returns the numpy array.
Expand source code Browse git
def numpy(self): """Convert the video to a writeable numpy array, triggers a load() and copy() as needed. Returns the numpy array.""" self.load() self._array = np.copy(self._array) if not self._array.flags['WRITEABLE'] else self._array # triggers copy self._array.setflags(write=True) # mutable iterators, torch conversion return self._array
def pad(self, padwidth=0, padheight=0)
-
Alias for zeropad
Expand source code Browse git
def pad(self, padwidth=0, padheight=0): """Alias for zeropad""" return self.zeropad(padwidth=padwidth, padheight=padheight)
def pkl(self, pklfile=None)
-
save the object to a pickle file and return the object, useful for intermediate saving in long fluent chains
Expand source code Browse git
def pkl(self, pklfile=None): """save the object to a pickle file and return the object, useful for intermediate saving in long fluent chains""" pklfile = pklfile if pklfile is not None else toextension(self.filename(), '.pkl') remkdir(filepath(pklfile)) vipy.util.save(self, pklfile) return self
def pklif(self, b, pklfile=None)
-
Save the object to the provided pickle file only if b=True. Uuseful for conditional intermediate saving in long fluent chains
Expand source code Browse git
def pklif(self, b, pklfile=None): """Save the object to the provided pickle file only if b=True. Uuseful for conditional intermediate saving in long fluent chains""" assert isinstance(b, bool) return self.pkl(pklfile) if b else self
def play(self, verbose=False, notebook=False, ffplay=True)
-
Play the saved video filename in self.filename()
If there is no filename, try to download it. If the filter chain is dirty or the pixels are loaded, dump to temp video file first then play it. This uses 'ffplay' on the PATH if available, otherwise uses a fallback player by showing a sequence of matplotlib frames. If the output of the ffmpeg filter chain has modified this video, then this will be saved to a temporary video file. To play the original video (indepenedent of the filter chain of this video), use
Video.ffplay()
.Args
verbose
- If true, show more verbose output
notebook
- If true, play in a jupyter notebook
ffplay
- If true, use ffplay to display the video (if available)
Returns
The unmodified video object
Expand source code Browse git
def play(self, verbose=False, notebook=False, ffplay=True): """Play the saved video filename in self.filename() If there is no filename, try to download it. If the filter chain is dirty or the pixels are loaded, dump to temp video file first then play it. This uses 'ffplay' on the PATH if available, otherwise uses a fallback player by showing a sequence of matplotlib frames. If the output of the ffmpeg filter chain has modified this video, then this will be saved to a temporary video file. To play the original video (indepenedent of the filter chain of this video), use `vipy.video.Video.ffplay`. Args: verbose: If true, show more verbose output notebook: If true, play in a jupyter notebook ffplay: If true, use ffplay to display the video (if available) Returns: The unmodified video object """ if not self.isdownloaded() and self.hasurl(): self.download() if not self.isloaded() and (iswebp(self.filename()) or isgif(self.filename())): self.load() if notebook: # save to temporary video, this video is not cleaned up and may accumulate try_import("IPython.display", "ipython"); import IPython.display if not self.hasfilename() or self.isloaded() or self._isdirty(): v = self.saveas(tempMP4()) warnings.warn('Saving video to temporary file "%s" for notebook viewer ... ' % v.filename()) return IPython.display.Video(v.filename(), embed=True) return IPython.display.Video(self.filename(), embed=True) elif ffplay and has_ffplay: if self.isloaded() or self._isdirty(): f = tempMP4() if verbose: warnings.warn('%s - Saving video to temporary file "%s" for ffplay ... ' % ('Video loaded into memory' if self.isloaded() else 'Dirty FFMPEG filter chain', f)) v = self.saveas(f) cmd = 'ffplay "%s"' % v.filename() if verbose: print('[vipy.video.play]: Executing "%s"' % cmd) os.system(cmd) if verbose: print('[vipy.video.play]: Removing temporary file "%s"' % v.filename()) os.remove(v.filename()) # cleanup elif self.hasfilename() or (self.hasurl() and self.download().hasfilename()): # triggers download self.ffplay() else: raise ValueError('Invalid video file "%s" - ffplay requires a video filename' % self.filename()) return self else: """Fallback player. This can visualize videos without ffplay, but it cannot guarantee frame rates. Large videos with complex scenes will slow this down and will render at lower frame rates.""" fps = self.framerate() assert fps > 0, "Invalid display framerate" with Stopwatch() as sw: for (k,im) in enumerate(self.load() if self.isloaded() else self.stream()): time.sleep(max(0, (1.0/self.framerate())*int(np.ceil((self.framerate()/fps))) - sw.since())) im.show(figure='video') if vipy.globals._user_hit_escape(): break vipy.show.close('video') return self
def preview(self, framenum=0)
-
Return selected frame of filtered video, return vipy.image.Image object. This is useful for previewing the frame shape of a complex filter chain or the frame contents at a particular location without loading the whole video
Expand source code Browse git
def preview(self, framenum=0): """Return selected frame of filtered video, return vipy.image.Image object. This is useful for previewing the frame shape of a complex filter chain or the frame contents at a particular location without loading the whole video""" if self.isloaded(): return self[framenum] elif self.hasurl() and not self.hasfilename(): self.download(verbose=True) if not self.hasfilename(): raise ValueError('Video file not found') if iswebp(self.filename()) or isgif(self.filename()): return self.load().frame(framenum) # Convert frame to mjpeg and pipe to stdout, used to get dimensions of video # - The MJPEG encoder will generally output lower quality than H.264 encoded frames # - This means that frame indexing from preview() will generate slightly different images than streaming raw # - Beware running convnets, as the pixels will be slightly different (~4 grey levels in uint8) ... try: # FFMPEG frame indexing is inefficient for large framenum. Need to add "-ss sec.msec" flag before input # - the "ss" option must be provided before the input filename, and is supported by ffmpeg-python as ".input(in_filename, ss=time)" # - Seek to the frame before the desired frame in order to pipe the next (desired) frame timestamp_in_seconds = max(0.0, (framenum-1)/float(self.framerate())) f_prepipe = self.clone(shallow=True)._update_ffmpeg_seek(offset=timestamp_in_seconds)._ffmpeg.filter('select', 'gte(n,{})'.format(0)) f = f_prepipe.output('pipe:', vframes=1, format='image2', vcodec='mjpeg')\ .global_args('-cpuflags', '0', '-loglevel', 'debug' if vipy.globals.isdebug() else 'error') (out, err) = f.run(capture_stdout=True, capture_stderr=True) except Exception as e: raise ValueError('[vipy.video.load]: Video preview failed with error "%s"\n - Video: "%s"\n - FFMPEG command: \'sh> %s\'\n - Try manually running this ffmpeg command to see errors. This error usually means that the video is corrupted.' % (str(e), str(self), str(self._ffmpeg_commandline(f_prepipe.output('preview.jpg', vframes=1))))) # [EXCEPTION]: UnidentifiedImageError: cannot identify image file, means usually that FFMPEG piped a zero length image try: return Image(array=np.array(PIL.Image.open(BytesIO(out)))) except Exception as e: print('[vipy.video.Video.preview][ERROR]: %s' % str(e)) print(' - FFMPEG attempted to extract a single frame from the following video and failed:\n %s' % str(self)) print(' - This may occur after calling clip() with too short a duration, try increasing the clip to be > 1 sec') print(' - This may occur after calling clip() with a startframe or endframe outside the duration of the video') print(' - This may occur if requesting a frame number greater than the length of the video. At this point, we do not know the video length, and cannot fail gracefully') print(' - This may occur when the framerate of the video from ffprobe (tbr) does not match that passed to fps filter, resulting in a zero length image preview piped to stdout') print(' - This may occur if the filter chain fails for some unknown reason on this video. Try running this ffmpeg command manually and inspect the FFMPEG console output:\n sh> %s' % str(self._ffmpeg_commandline(f_prepipe.output('preview.jpg', vframes=1)))) raise
def print(self, prefix='', verbose=True, sleep=None)
-
Print the representation of the video
This is useful for debugging in long fluent chains. Sleep is useful for adding in a delay for distributed processing.
Args
prefix
- prepend a string prefix to the video repr when printing. Useful for logging.
verbose
- Print out the video repr. Set verbose=False to just sleep
sleep
- Integer number of seconds to sleep[ before returning
fluent [bool]: If true, return self else return None. This is useful for terminating long fluent chains in lambdas that return None Returns:
The video object after sleepingExpand source code Browse git
def print(self, prefix='', verbose=True, sleep=None): """Print the representation of the video This is useful for debugging in long fluent chains. Sleep is useful for adding in a delay for distributed processing. Args: prefix: prepend a string prefix to the video __repr__ when printing. Useful for logging. verbose: Print out the video __repr__. Set verbose=False to just sleep sleep: Integer number of seconds to sleep[ before returning fluent [bool]: If true, return self else return None. This is useful for terminating long fluent chains in lambdas that return None Returns: The video object after sleeping """ if verbose: print(prefix+self.__repr__()) if sleep is not None: assert isinstance(sleep, int) and sleep > 0, "Sleep must be a non-negative integer number of seconds" time.sleep(sleep) return self
def printif(self, b, prefix='', verbose=True, sleep=None)
-
Call
Video.print()
if b=True. Useful for fluent chains to print periodically.Expand source code Browse git
def printif(self, b, prefix='', verbose=True, sleep=None): """Call `vipy.video.Video.print` if b=True. Useful for fluent chains to print periodically.""" assert isinstance(b, bool) return self.print(prefix=prefix, verbose=b, sleep=sleep)
def probe(self, **kwargs)
-
Run ffprobe on the filename and return the result as a dictionary
Args
Any keyword arguments supported by python-ffmpeg probe() - these are passed in as-is - for flags, use flag_name=None (e.g., show_frames=None) so that ffmpeg.probe() handles them correctly
Expand source code Browse git
def probe(self, **kwargs): """Run ffprobe on the filename and return the result as a dictionary Args: Any keyword arguments supported by python-ffmpeg probe() - these are passed in as-is - for flags, use flag_name=None (e.g., show_frames=None) so that ffmpeg.probe() handles them correctly """ if not has_ffprobe: raise ValueError('"ffprobe" executable not found on path, this is optional for vipy.video - Install from http://ffmpeg.org/download.html') assert self.downloadif().hasfilename(), "Invalid video file '%s' for ffprobe" % self.filename() return ffmpeg.probe(self.filename(), **kwargs)
def probeshape(self)
-
Return the (height, width) of underlying video file as determined from ffprobe
Warning: this does not take into account any applied ffmpeg filters. The shape will be the (height, width) of the underlying video file.
Expand source code Browse git
def probeshape(self): """Return the (height, width) of underlying video file as determined from ffprobe .. warning:: this does not take into account any applied ffmpeg filters. The shape will be the (height, width) of the underlying video file. """ p = self.probe() assert len(p['streams']) > 0 return (p['streams'][0]['height'], p['streams'][0]['width'])
def quicklook(self, n=9, mindim=256, startframe=0, animate=False, dt=30, thumbnail=None)
-
Generate a montage of n uniformly spaced frames. Montage increases rowwise for n uniformly spaced frames, starting from frame zero and ending on the last frame.
Input
n: Number of images in the quicklook mindim: The minimum dimension of each of the elements in the montage animate: If true, return a video constructed by animating the quicklook into a video by showing dt consecutive frames dt: The number of frames for animation startframe: The initial frame index to start the n uniformly sampled frames for the quicklook thumbnail [
vipy.image.Image
]: If provided, prepent the first element in the montage with this thumbnail. This is useful for showing a high resolution image (e.g. a face, small object) to be contained in the video for review.Note: The first frame in the upper left is guaranteed to be the start frame of the labeled activity, but the last frame in the bottom right may not be precisely the end frame and may be off by at most len(video)/9.
Expand source code Browse git
def quicklook(self, n=9, mindim=256, startframe=0, animate=False, dt=30, thumbnail=None): """Generate a montage of n uniformly spaced frames. Montage increases rowwise for n uniformly spaced frames, starting from frame zero and ending on the last frame. Input: n: Number of images in the quicklook mindim: The minimum dimension of each of the elements in the montage animate: If true, return a video constructed by animating the quicklook into a video by showing dt consecutive frames dt: The number of frames for animation startframe: The initial frame index to start the n uniformly sampled frames for the quicklook thumbnail [`vipy.image.Image`]: If provided, prepent the first element in the montage with this thumbnail. This is useful for showing a high resolution image (e.g. a face, small object) to be contained in the video for review. ..note:: The first frame in the upper left is guaranteed to be the start frame of the labeled activity, but the last frame in the bottom right may not be precisely the end frame and may be off by at most len(video)/9. """ if not self.isloaded(): self.load() if animate: return Video(frames=[self.quicklook(n=n, startframe=k, animate=False, dt=dt) for k in range(0, min(dt, len(self)))], framerate=self.framerate()) framelist = [min(int(np.round(f))+startframe, len(self)-1) for f in np.linspace(0, len(self)-1, n)] imframes = [self.frame(k).maxmatte() # letterbox or pillarbox for (j,k) in enumerate(framelist)] imframes = [im.savefig(figure=1).rgb() for im in imframes] # temp storage in memory if thumbnail is not None: assert isinstance(thumbnail, vipy.image.Image) imframes = [thumbnail.maxmatte().mindim(mindim)] + imframes # prepend return vipy.visualize.montage(imframes, imgwidth=mindim, imgheight=mindim)
def randomcrop(self, shape, withbox=False)
-
Crop the video to shape=(H,W) with random position such that the crop contains only valid pixels, and optionally return the box
Expand source code Browse git
def randomcrop(self, shape, withbox=False): """Crop the video to shape=(H,W) with random position such that the crop contains only valid pixels, and optionally return the box""" assert shape[0] <= self.height() and shape[1] <= self.width() # triggers preview() (xmin, ymin) = (np.random.randint(self.height()-shape[0]), np.random.randint(self.width()-shape[1])) bb = vipy.geometry.BoundingBox(xmin=int(xmin), ymin=int(ymin), width=int(shape[1]), height=int(shape[0])) # may be outside frame self.crop(bb, zeropad=True) return self if not withbox else (self, bb)
def reload(self)
-
Expand source code Browse git
def reload(self): return self.clone(flush=True).load()
def relpath(self, parent=None, start=None)
-
Replace the filename with a relative path to parent (or current working directory if none).
Usage:
v = vipy.video.Video(filename='/path/to/dataset/video/category/out.mp4') v.relpath(parent='/path/to/dataset') v.filename() == 'video/category/out.mp4'
If the current working directory is /path/to/dataset, and v.load() is called, the filename will be loaded.
Args
parent [str]: A parent path of the current filename to remove and be relative to. If filename is '/path/to/video.mp4' then filename must start with parent, then parent will be remvoed from filename. start [str]: Return a relative filename starting from path start='/path/to/dir' that will create a relative path to this filename. If start='/a/b/c' and filename='/a/b/d/e/f.ext' then return filename '../d/e/f.ext'
Returns
This video object with the filename changed to be a relative path
Expand source code Browse git
def relpath(self, parent=None, start=None): """Replace the filename with a relative path to parent (or current working directory if none). Usage: ```python v = vipy.video.Video(filename='/path/to/dataset/video/category/out.mp4') v.relpath(parent='/path/to/dataset') v.filename() == 'video/category/out.mp4' ``` If the current working directory is /path/to/dataset, and v.load() is called, the filename will be loaded. Args: parent [str]: A parent path of the current filename to remove and be relative to. If filename is '/path/to/video.mp4' then filename must start with parent, then parent will be remvoed from filename. start [str]: Return a relative filename starting from path start='/path/to/dir' that will create a relative path to this filename. If start='/a/b/c' and filename='/a/b/d/e/f.ext' then return filename '../d/e/f.ext' Returns: This video object with the filename changed to be a relative path """ assert parent is not None or start is not None if parent is not None: parent = parent if parent is not None else os.getcwd() assert parent in os.path.expanduser(self.filename()), "Parent path '%s' not found in abspath '%s'" % (parent, self.filename()) self.filename(PurePath(os.path.expanduser(self.filename())).relative_to(parent)) if start is not None: self.filename(os.path.join(os.path.relpath(os.path.expanduser(filepath(self.filename())), start), filetail(self.filename()))) return self
def rename(self, newname)
-
Move the underlying video file preserving the absolute path, such that self.filename() == '/a/b/c.ext' and newname='d.ext', then self.filename() -> '/a/b/d.ext', and move the corresponding file
Expand source code Browse git
def rename(self, newname): """Move the underlying video file preserving the absolute path, such that self.filename() == '/a/b/c.ext' and newname='d.ext', then self.filename() -> '/a/b/d.ext', and move the corresponding file""" newfile = os.path.join(filepath(self.filename()), newname) shutil.move(self.filename(), newfile) return self.filename(newfile)
def rescale(self, s)
-
Rescale the video by factor s, such that the new dimensions are (sH, sW), can only be applied prior to load()
Expand source code Browse git
def rescale(self, s): """Rescale the video by factor s, such that the new dimensions are (s*H, s*W), can only be applied prior to load()""" if s == 1: return self assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" self.shape(shape=(int(np.round(self.height()*float(np.ceil(s*1e6)/1e6))), int(np.round(self.width()*float(np.ceil(s*1e6)/1e6))))) # update the known shape self._ffmpeg = self._ffmpeg.filter('scale', 'iw*%1.6f' % float(np.ceil(s*1e6)/1e6), 'ih*%1.6f' % float(np.ceil(s*1e6)/1e6)) # ceil last significant digit to avoid off by one return self
def resize(self, rows=None, cols=None, width=None, height=None)
-
Resize the video to be (rows=height, cols=width)
Expand source code Browse git
def resize(self, rows=None, cols=None, width=None, height=None): """Resize the video to be (rows=height, cols=width)""" assert not (rows is not None and height is not None) assert not (cols is not None and width is not None) rows = rows if rows is not None else height cols = cols if cols is not None else width newshape = (rows if rows is not None else int(np.round(self.height()*(cols/self.width()))), cols if cols is not None else int(np.round(self.width()*(rows/self.height())))) if (rows is None and cols is None): return self # only if strictly necessary if not self.isloaded(): # Apply the scale filter: # - Note that multiple calls to resize will process through the filter chain one by one in the order of resizing calls # - This can introduce resizing artifacts if the resize is a downsample followed by an upsample # - One common use case is to downsample a video for tracking, then apply the tracks to the original video # - One approach: downsample the video, track at low resolution, upsample tracks back to the original resolution, then flush the filter chain to load the video at original resolution with no rescaling # >>> vc = tracker(v.clone().mindim(128)).mindim(v.mindim()).clone(flushfilter=True) self._ffmpeg = self._ffmpeg.filter('scale', cols if cols is not None else -1, rows if rows is not None else -1) else: # Do not use self.__iter__() which triggers copy for mutable arrays #self.array(np.stack([Image(array=self._array[k]).resize(rows=rows, cols=cols).array() for k in range(len(self))]), copy=False) # Faster: RGB->RGBX to allow for PIL.Image.fromarray() without tobytes() copy, padding faster than np.concatenate() #self.array(np.stack([PIL.Image.fromarray(x, mode='RGBX').resize( (cols, rows), resample=PIL.Image.BILINEAR) for x in np.pad(self._array, ((0,0),(0,0),(0,0),(0,1)))])[:,:,:,:-1], copy=False) # RGB->RGBX->RGB # Fastest: padding introduces more overhead than just accepting tobytes(), image size dependent? self.array(np.stack([PIL.Image.fromarray(x).resize( (newshape[1], newshape[0]), resample=PIL.Image.BILINEAR) for x in np.ascontiguousarray(self._array)]), copy=False) self.shape(shape=newshape) # manually set newshape return self
def resolution_of_videofile(self)
-
Return video resolution in (height, width) in pixels (NOT the filter chain), requires ffprobe.
Expand source code Browse git
def resolution_of_videofile(self): """Return video resolution in (height, width) in pixels (NOT the filter chain), requires ffprobe. """ p = self.probe() assert 'streams' in p and len(['streams']) > 0 (H,W) = (p['streams'][0]['height'], p['streams'][0]['width']) # (height, width) in pixels return (W,H) if ('tags' in p['streams'][0] and 'rotate' in p['streams'][0]['tags'] and p['streams'][0]['tags']['rotate'] in ['90','270']) else (H,W)
def restore(self, filename)
-
Save the currently stored video as set using
Video.store()
to filename, and set up filenameExpand source code Browse git
def restore(self, filename): """Save the currently stored video as set using `vipy.video.Video.store` to filename, and set up filename""" assert self.hasattribute('__video__'), "Video not stored" with open(filename, 'wb') as f: f.write(self.attributes['__video__']) return self.filename(filename)
def returns(self, r=None)
-
Return the provided value, useful for terminating long fluent chains without returning self
Expand source code Browse git
def returns(self, r=None): """Return the provided value, useful for terminating long fluent chains without returning self""" return r
def rot90ccw(self)
-
Rotate the video 90 degrees counter-clockwise, can only be applied prior to load()
Expand source code Browse git
def rot90ccw(self): """Rotate the video 90 degrees counter-clockwise, can only be applied prior to load()""" assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" self.shape(shape=(self.width(), self.height())) # transposed self._ffmpeg = self._ffmpeg.filter('transpose', 2) return self
def rot90cw(self)
-
Rotate the video 90 degrees clockwise, can only be applied prior to load()
Expand source code Browse git
def rot90cw(self): """Rotate the video 90 degrees clockwise, can only be applied prior to load()""" assert not self.isloaded(), "Filters can only be applied prior to load() - Try calling flush() first" self.shape(shape=(self.width(), self.height())) # transposed self._ffmpeg = self._ffmpeg.filter('transpose', 1) return self
def sanitize(self)
-
Remove all private keys from the attributes dictionary.
The attributes dictionary is useful storage for arbitrary (key,value) pairs. However, this storage may contain sensitive information that should be scrubbed from the video before serialization. As a general rule, any key that is of the form '__keyname' prepended by two underscores is a private key. This is analogous to private or reserved attributes in the python lanugage. Users should reserve these keynames for those keys that should be sanitized and removed before any seerialization of this object.
assert self.setattribute('__mykey', 1).sanitize().hasattribute('__mykey') == False
Expand source code Browse git
def sanitize(self): """Remove all private keys from the attributes dictionary. The attributes dictionary is useful storage for arbitrary (key,value) pairs. However, this storage may contain sensitive information that should be scrubbed from the video before serialization. As a general rule, any key that is of the form '__keyname' prepended by two underscores is a private key. This is analogous to private or reserved attributes in the python lanugage. Users should reserve these keynames for those keys that should be sanitized and removed before any seerialization of this object. ```python assert self.setattribute('__mykey', 1).sanitize().hasattribute('__mykey') == False ``` """ if self._has_private_attribute(): self.attributes = {k:v for (k,v) in self.attributes.items() if not k.startswith('__')} return self
def saveas(self, outfile=None, framerate=None, vcodec='libx264', verbose=False, ignoreErrors=False, flush=False, pause=5)
-
Save video to new output video file. This function does not draw boxes, it saves pixels to a new video file.
Args
outfile
- the absolute path to the output video file. This extension can be .mp4 (for video) or [".webp",".gif"] (for animated image)
ignoreErrors
- if True, then exit gracefully without throwing an exception. Useful for chaining download().saveas() on parallel dataset downloads
flush
- If true, then flush the buffer for this object right after saving the new video. This is useful for transcoding in parallel
framerate
- input framerate of the frames in the buffer, or the output framerate of the transcoded video. If not provided, use framerate of source video
pause
- an integer in seconds to pause between loops of animated images if the outfile is webp or animated gif
Returns
a new video object with this video filename, and a clean video filter chain
Note
- If self.array() is loaded, then export the contents of self._array to the video file
- If self.array() is not loaded, and there exists a valid video file, apply the filter chain directly to the input video
- If outfile==None or outfile==self.filename(), then overwrite the current filename
Expand source code Browse git
def saveas(self, outfile=None, framerate=None, vcodec='libx264', verbose=False, ignoreErrors=False, flush=False, pause=5): """Save video to new output video file. This function does not draw boxes, it saves pixels to a new video file. Args: outfile: the absolute path to the output video file. This extension can be .mp4 (for video) or [".webp",".gif"] (for animated image) ignoreErrors: if True, then exit gracefully without throwing an exception. Useful for chaining download().saveas() on parallel dataset downloads flush: If true, then flush the buffer for this object right after saving the new video. This is useful for transcoding in parallel framerate: input framerate of the frames in the buffer, or the output framerate of the transcoded video. If not provided, use framerate of source video pause: an integer in seconds to pause between loops of animated images if the outfile is webp or animated gif Returns: a new video object with this video filename, and a clean video filter chain .. note:: - If self.array() is loaded, then export the contents of self._array to the video file - If self.array() is not loaded, and there exists a valid video file, apply the filter chain directly to the input video - If outfile==None or outfile==self.filename(), then overwrite the current filename """ outfile = tocache(tempMP4()) if outfile is None else os.path.normpath(os.path.abspath(os.path.expanduser(outfile))) premkdir(outfile) # create output directory for this file if not exists framerate = framerate if framerate is not None else self._framerate assert vipy.util.isvideofile(outfile), "Invalid filename extension for video filename" if verbose: print('[vipy.video.saveas]: Saving video "%s" ...' % outfile) try: if iswebp(outfile): return self.webp(outfile, pause) elif isgif(outfile): return self.gif(outfile, pause) elif isjsonfile(outfile): with open(outfile) as f: f.write(self.json(encode=True)) return outfile elif self.isloaded(): # Save numpy() from load() to video, forcing to be even shape (n, height, width, channels) = self._array.shape process = ffmpeg.input('pipe:', format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(width, height), r=framerate) \ .filter('pad', 'ceil(iw/2)*2', 'ceil(ih/2)*2') \ .output(filename=outfile, pix_fmt='yuv420p', vcodec=vcodec) \ .overwrite_output() \ .global_args('-cpuflags', '0', '-loglevel', 'quiet' if not vipy.globals.isdebug() else 'debug') \ .run_async(pipe_stdin=True) for frame in self._array: process.stdin.write(frame.astype(np.uint8).tobytes()) process.stdin.close() process.wait() elif (self.isdownloaded() and self._isdirty()) or isRTSPurl(self.filename()) or isRTMPurl(self.filename()): # Transcode the video file directly, do not load() then export # Requires saving to a tmpfile if the output filename is the same as the input filename tmpfile = '%s.tmp%s' % (filefull(outfile), fileext(outfile)) if outfile == self.filename() else outfile self._ffmpeg.filter('pad', 'ceil(iw/2)*2', 'ceil(ih/2)*2') \ .output(filename=tmpfile, pix_fmt='yuv420p', vcodec=vcodec, r=framerate) \ .overwrite_output() \ .global_args('-cpuflags', '0', '-loglevel', 'quiet' if not vipy.globals.isdebug() else 'debug') \ .run() if outfile == self.filename(): if os.path.exists(self.filename()): os.remove(self.filename()) shutil.move(tmpfile, self.filename()) elif self.hasfilename() and not self._isdirty(): shutil.copyfile(self.filename(), outfile) elif self.hasurl() and not self.hasfilename(): raise ValueError('Input video url "%s" not downloaded, call download() first' % self.url()) elif not self.hasfilename(): raise ValueError('Input video file not found "%s"' % self.filename()) else: raise ValueError('saveas() failed') except Exception as e: if ignoreErrors: # useful for saving a large number of videos in parallel where some failed download print('[vipy.video.saveas]: Failed with error "%s" - Returning empty video' % str(repr(e))) else: raise # Return a new video, cloned from this video with the new video file, optionally flush the video we loaded before returning return self.clone(flushforward=True, flushfilter=True, flushbackward=flush).filename(outfile).nourl()
def savetemp(self)
-
Alias for
Video.savetmp()
Expand source code Browse git
def savetemp(self): """Alias for `vipy.video.Video.savetmp`""" return self.savetmp()
def savetmp(self)
-
Call
Video.saveas()
using a new temporary video file, and return the video object with this new filenameExpand source code Browse git
def savetmp(self): """Call `vipy.video.Video.saveas` using a new temporary video file, and return the video object with this new filename""" return self.saveas(outfile=tempMP4())
def setattribute(self, k, v=None)
-
Expand source code Browse git
def setattribute(self, k, v=None): if self.attributes is None: self.attributes = {} self.attributes[k] = v return self
def shape(self, shape=None, probe=False)
-
Return (height, width) of the frames, requires loading a preview frame from the video if the video is not already loaded, or providing the shape=(height,width) by the user
Expand source code Browse git
def shape(self, shape=None, probe=False): """Return (height, width) of the frames, requires loading a preview frame from the video if the video is not already loaded, or providing the shape=(height,width) by the user""" if probe: # Set the shape of the video from the filename by ffprobe, this should be deprecated return self.shape(self.probeshape(), probe=False) elif shape is not None: # Set the shape of the video using the shape provided by the user (e.g. sometimes the user knows what this will be) assert isinstance(shape, tuple), "shape=(height, width) tuple" self._shape = shape self._channels = self.channels() #self._previewhash = hashlib.md5(str(self._ffmpeg_commandline()).encode()).hexdigest() return self elif not self.isloaded(): # Preview a frame from the ffmpeg filter chain (more expensive) if self._shape is None or len(self._shape) == 0: # dirty filter chain im = self.preview() # ffmpeg chain changed, load a single frame of video, triggers fetch self._shape = (im.height(), im.width()) # cache the shape self._channels = im.channels() #self._previewhash = previewhash return self._shape else: # Frames already loaded - get shape from numpy array return (self._array.shape[1], self._array.shape[2])
def show(self, verbose=False, notebook=False, ffplay=False)
-
Alias for play
Expand source code Browse git
def show(self, verbose=False, notebook=False, ffplay=False): """Alias for play""" return self.play(verbose=verbose, notebook=notebook, ffplay=ffplay)
def speed(self, s)
-
Change the speed by a multiplier s. If s=1, this will be the same speed, s=0.5 for half-speed (slower playback), s=2 for double-speed (faster playback)
Expand source code Browse git
def speed(self, s): """Change the speed by a multiplier s. If s=1, this will be the same speed, s=0.5 for half-speed (slower playback), s=2 for double-speed (faster playback)""" assert s > 0, "Invalid input" self._ffmpeg = self._ffmpeg.setpts('%1.3f*PTS' % float(1.0/float(s))) return self
def store(self)
-
Store the current video file as an attribute of this object.
Useful for archiving an object to be fully self contained without any external references.
v == v.store().restore(v.filename())
Note
-Remove this stored video using unstore() -Unpack this stored video and set up the video chains using restore() -This method is more efficient than load() followed by pkl(), as it stores the encoded video as a byte string. -Useful for creating a single self contained object for distributed processing.
Expand source code Browse git
def store(self): """Store the current video file as an attribute of this object. Useful for archiving an object to be fully self contained without any external references. ```python v == v.store().restore(v.filename()) ``` .. note:: -Remove this stored video using unstore() -Unpack this stored video and set up the video chains using restore() -This method is more efficient than load() followed by pkl(), as it stores the encoded video as a byte string. -Useful for creating a single self contained object for distributed processing. """ assert self.hasfilename(), "Video file not found. Try saveas() first to create a video file to store." with open(self.filename(), 'rb') as f: self.attributes['__video__'] = f.read() return self
def stream(self, write=False, overwrite=False, queuesize=512, bitrate=None, buffered=False, rebuffered=False, bufsize=256)
-
Iterator to yield groups of frames streaming from video.
A video stream is a real time iterator to read or write from a video. Streams are useful to group together frames into clips that are operated on as a group.
The following use cases are supported:
v = vipy.video.RandomScene()
Stream individual video frames lagged by 10 frames and 20 frames
for (im1, im2) in zip(v.stream().frame(n=-10), v.stream().frame(n=-20)): print(im1, im2)
Stream overlapping clips such that each clip is a video n=16 frames long and starts at frame i, and the next clip is n=16 frames long and starts at frame i=i+m
for vc in v.stream().clip(n=16, m=4): print(vc)
Stream non-overlapping batches of frames such that each clip is a video of length n and starts at frame i, and the next clip is length n and starts at frame i+n
for vb in v.stream().batch(n=16): print(vb)
Create a write stream to incrementally add frames to long video.
vi = vipy.video.Video(filename='/path/to/output.mp4') vo = vipy.video.Video(filename='/path/to/input.mp4') with vo.stream(write=True) as s: for im in vi.stream(): s.write(im) # manipulate pixels of im, if desired
Create a 480p YouTube live stream from an RTSP camera at 5Hz
vo = vipy.video.Scene(url='rtmp://a.rtmp.youtube.com/live2/$SECRET_STREAM_KEY') vi = vipy.video.Scene(url='rtsp://URL').framerate(5) with vo.framerate(5).stream(write=True, bitrate='1000k') as s: for im in vi.framerate(5).resize(cols=854, rows=480): s.write(im)
Args
write
- [bool] If true, create a write stream
overwrite
- [bool] If true, and the video output filename already exists, overwrite it
bufsize
- [int] The maximum queue size for the ffmpeg pipe thread in the primary iterator. The queue size is the maximum size of pre-fetched frames from the ffmpeg pip. This should be big enough that you are never waiting for queue fills
bitrate
- [str] The ffmpeg bitrate of the output encoder for writing, written like '2000k'
bufsize
- [int] The maximum size of the stream buffer in frames. The stream buffer length should be big enough so that all iterators can yield before deleting old frames
Returns
A Stream object
Note: Using this iterator may affect PDB debugging due to stdout/stdin redirection. Use ipdb instead.
Expand source code Browse git
def stream(self, write=False, overwrite=False, queuesize=512, bitrate=None, buffered=False, rebuffered=False, bufsize=256): """Iterator to yield groups of frames streaming from video. A video stream is a real time iterator to read or write from a video. Streams are useful to group together frames into clips that are operated on as a group. The following use cases are supported: ```python v = vipy.video.RandomScene() ``` Stream individual video frames lagged by 10 frames and 20 frames ```python for (im1, im2) in zip(v.stream().frame(n=-10), v.stream().frame(n=-20)): print(im1, im2) ``` Stream overlapping clips such that each clip is a video n=16 frames long and starts at frame i, and the next clip is n=16 frames long and starts at frame i=i+m ```python for vc in v.stream().clip(n=16, m=4): print(vc) ``` Stream non-overlapping batches of frames such that each clip is a video of length n and starts at frame i, and the next clip is length n and starts at frame i+n ```python for vb in v.stream().batch(n=16): print(vb) ``` Create a write stream to incrementally add frames to long video. ```python vi = vipy.video.Video(filename='/path/to/output.mp4') vo = vipy.video.Video(filename='/path/to/input.mp4') with vo.stream(write=True) as s: for im in vi.stream(): s.write(im) # manipulate pixels of im, if desired ``` Create a 480p YouTube live stream from an RTSP camera at 5Hz ```python vo = vipy.video.Scene(url='rtmp://a.rtmp.youtube.com/live2/$SECRET_STREAM_KEY') vi = vipy.video.Scene(url='rtsp://URL').framerate(5) with vo.framerate(5).stream(write=True, bitrate='1000k') as s: for im in vi.framerate(5).resize(cols=854, rows=480): s.write(im) ``` Args: write: [bool] If true, create a write stream overwrite: [bool] If true, and the video output filename already exists, overwrite it bufsize: [int] The maximum queue size for the ffmpeg pipe thread in the primary iterator. The queue size is the maximum size of pre-fetched frames from the ffmpeg pip. This should be big enough that you are never waiting for queue fills bitrate: [str] The ffmpeg bitrate of the output encoder for writing, written like '2000k' bufsize: [int] The maximum size of the stream buffer in frames. The stream buffer length should be big enough so that all iterators can yield before deleting old frames Returns: A Stream object ..note:: Using this iterator may affect PDB debugging due to stdout/stdin redirection. Use ipdb instead. """ return Stream(self, queuesize=queuesize, write=write, overwrite=overwrite, bitrate=bitrate, buffered=buffered, rebuffered=rebuffered, bufsize=bufsize) # do not clone
def take(self, n)
-
Return n frames from the clip uniformly spaced as numpy array
Args
n
- Integer number of uniformly spaced frames to return
Returns
A numpy array of shape (n,W,H)
Warning: This assumes that the entire video is loaded into memory (e.g. call
Video.load()
). Use with caution.Expand source code Browse git
def take(self, n): """Return n frames from the clip uniformly spaced as numpy array Args: n: Integer number of uniformly spaced frames to return Returns: A numpy array of shape (n,W,H) .. warning:: This assumes that the entire video is loaded into memory (e.g. call `vipy.video.Video.load`). Use with caution. """ assert self.isloaded(), "load() is required before take()" dt = int(np.round(len(self._array) / float(n))) # stride return self._array[::dt][0:n]
def thumbnail(self, outfile=None, frame=0)
-
Return annotated frame=k of video, save annotation visualization to provided outfile.
This is functionally equivalent to
Video.frame()
with an additional outfile argument to easily save an annotated thumbnail image.Args
outfile
- [str] an optional outfile to save the annotated frame
frame
- [int >= 0] The frame to output the thumbnail
Returns
A
Image
object for frame k.Expand source code Browse git
def thumbnail(self, outfile=None, frame=0): """Return annotated frame=k of video, save annotation visualization to provided outfile. This is functionally equivalent to `vipy.video.Video.frame` with an additional outfile argument to easily save an annotated thumbnail image. Args: outfile: [str] an optional outfile to save the annotated frame frame: [int >= 0] The frame to output the thumbnail Returns: A `vipy.image.Image` object for frame k. """ im = self.frame(frame, img=self.preview(frame).array()) return im.savefig(outfile) if outfile is not None else im
def tocache(self)
-
Call
Video.saveas()
using a new temporary video file in VIPY cache, and return the video object with this new filenameExpand source code Browse git
def tocache(self): """Call `vipy.video.Video.saveas` using a new temporary video file in VIPY cache, and return the video object with this new filename""" return self.saveas(outfile=vipy.util.tocache(tempMP4()))
def tonumpy(self)
-
Alias for numpy()
Expand source code Browse git
def tonumpy(self): """Alias for numpy()""" return self.numpy()
def torch(self, startframe=0, endframe=None, length=None, stride=1, take=None, boundary='repeat', order='nchw', verbose=False, withslice=False, scale=1.0, withlabel=False, nonelabel=False)
-
Convert the loaded video of shape NxHxWxC frames to an MxCxHxW torch tensor/
Args
startframe
- [int >= 0] The start frame of the loaded video to use for constructig the torch tensor
endframe
- [int >= 0] The end frame of the loaded video to use for constructing the torch tensor
length
- [int >= 0] The length of the torch tensor if endframe is not provided.
stride
- [int >= 1] The temporal stride in frames. This is the number of frames to skip.
take
- [int >= 0] The number of uniformly spaced frames to include in the tensor.
boundary
- ['repeat', 'cyclic'] The boundary handling for when the requested tensor slice goes beyond the end of the video
order
- ['nchw', 'nhwc', 'chwn', 'cnhw'] The axis ordering of the returned torch tensor N=number of frames (batchsize), C=channels, H=height, W=width
- verbose [bool]: Print out the slice used for contructing tensor
withslice
- [bool] Return a tuple (tensor, slice) that includes the slice used to construct the tensor. Useful for data provenance.
scale
- [float] An optional scale factor to apply to the tensor. Useful for converting [0,255] -> [0,1]
withlabel
- [bool] Return a tuple (tensor, labels) that includes the N framewise activity labels.
nonelabel
- [bool] returns tuple (t, None) if withlabel=False
Returns Returns torch float tensor, analogous to torchvision.transforms.ToTensor()
Return (tensor, slice) if withslice=True (withslice takes precedence) Returns (tensor, labellist) if withlabel=TrueNotes
- This triggers a load() of the video
- The precedence of arguments is (startframe, endframe) or (startframe, startframe+length), then stride and take.
- Follows numpy slicing rules. Optionally return the slice used if withslice=True
Expand source code Browse git
def torch(self, startframe=0, endframe=None, length=None, stride=1, take=None, boundary='repeat', order='nchw', verbose=False, withslice=False, scale=1.0, withlabel=False, nonelabel=False): """Convert the loaded video of shape NxHxWxC frames to an MxCxHxW torch tensor/ Args: startframe: [int >= 0] The start frame of the loaded video to use for constructig the torch tensor endframe: [int >= 0] The end frame of the loaded video to use for constructing the torch tensor length: [int >= 0] The length of the torch tensor if endframe is not provided. stride: [int >= 1] The temporal stride in frames. This is the number of frames to skip. take: [int >= 0] The number of uniformly spaced frames to include in the tensor. boundary: ['repeat', 'cyclic'] The boundary handling for when the requested tensor slice goes beyond the end of the video order: ['nchw', 'nhwc', 'chwn', 'cnhw'] The axis ordering of the returned torch tensor N=number of frames (batchsize), C=channels, H=height, W=width verbose [bool]: Print out the slice used for contructing tensor withslice: [bool] Return a tuple (tensor, slice) that includes the slice used to construct the tensor. Useful for data provenance. scale: [float] An optional scale factor to apply to the tensor. Useful for converting [0,255] -> [0,1] withlabel: [bool] Return a tuple (tensor, labels) that includes the N framewise activity labels. nonelabel: [bool] returns tuple (t, None) if withlabel=False Returns Returns torch float tensor, analogous to torchvision.transforms.ToTensor() Return (tensor, slice) if withslice=True (withslice takes precedence) Returns (tensor, labellist) if withlabel=True .. notes:: - This triggers a load() of the video - The precedence of arguments is (startframe, endframe) or (startframe, startframe+length), then stride and take. - Follows numpy slicing rules. Optionally return the slice used if withslice=True """ try_import('torch'); import torch frames = self.load().numpy() if self.load().numpy().ndim == 4 else np.expand_dims(self.load().numpy(), 3) # NxHxWx(C=1, C=3) assert boundary in ['repeat', 'strict', 'cyclic'], "Invalid boundary mode - must be in ['repeat', 'strict', 'cyclic']" # Slice index (i=start (zero-indexed), j=end (non-inclusive), k=step) (i,j,k) = (startframe, endframe, stride) if startframe == 'random': assert length is not None, "Random start frame requires fixed length" i = max(0, np.random.randint(len(frames)-length+1)) if endframe is not None: assert length is None, "Cannot specify both endframe and length" assert endframe > startframe, "End frame must be greater than start frame" (j,k) = (endframe, 1) if length is not None: assert endframe is None, "Cannot specify both endframe and length" assert length >= 0, "Length must be positive" (j,k) = (i+length, 1) if length is None and endframe is None: j = len(frames) # use them all if stride != 1: assert take is None, "Cannot specify both take and stride" assert stride >= 1, "Stride must be >= 1" k = stride if take is not None: # Uniformly sampled frames to result in len(frames)=take assert stride == 1, "Cannot specify both take and stride" assert take <= len(frames), "Take must be less than the number of frames" k = int(np.ceil(len(frames)/float(take))) # Boundary handling assert i >= 0, "Start frame must be >= 0" assert i < j, "Start frame must be less then end frame" assert k <= len(frames), "Stride must be <= len(frames)" n = len(frames) # true video length for labels if boundary == 'repeat' and j > len(frames): for d in range(j-len(frames)): frames = np.concatenate( (frames, np.expand_dims(frames[-1], 0) )) elif boundary == 'cyclic' and j > len(frames): for d in range(j-len(frames)): frames = np.concatenate( (frames, np.expand_dims(frames[j % len(frames)], 0) )) assert j <= len(frames), "invalid slice=%s for frame shape=%s" % (str((i,j,k)), str(frames.shape)) if verbose: print('[vipy.video.torch]: slice (start,end,step)=%s for frame shape (N,C,H,W)=%s' % (str((i,j,k)), str(frames.shape))) # Slice and transpose to torch tensor axis ordering t = torch.from_numpy(frames[i:j:k] if (k!=1 or i!=0 or j!=len(frames)) else frames) # do not copy - This shares the numpy buffer of the video, be careful! if t.dim() == 2: t = t.unsqueeze(0).unsqueeze(-1) # HxW -> (N=1)xHxWx(C=1) if order == 'nchw': t = t.permute(0,3,1,2) # NxCxHxW, view elif order == 'nhwc': pass # NxHxWxC (native numpy order) elif order == 'cnhw' or order == 'cdhw': t = t.permute(3,0,1,2) # CxNxHxW == CxDxHxW (for torch conv3d), view elif order == 'chwn': t = t.permute(3,1,2,0) # CxHxWxN, view else: raise ValueError("Invalid order = must be in ['nchw', 'nhwc', 'chwn', 'cnhw']") # Scaling (optional) if scale is not None and self.colorspace() != 'float': t = (1.0/255.0)*t # [0,255] -> [0,1] elif scale is not None and scale != 1.0: t = scale*t # Return tensor or (tensor, slice) or (tensor, labels) if withslice: return (t, (i,j,k)) elif withlabel: labels = [sorted(tuple(self.activitylabels( (f%n) if boundary == 'cyclic' else min(f, n-1) ))) for f in range(i,j,k)] return (t, labels) elif nonelabel: return (t, None) else: return t
def unstore(self)
-
Delete the currently stored video from `vipy.video.Video.store
Expand source code Browse git
def unstore(self): """Delete the currently stored video from `vipy.video.Video.store""" return self.delattribute('__video__')
def url(self, url=None, username=None, password=None, sha1=None)
-
Video URL and URL download properties
Expand source code Browse git
def url(self, url=None, username=None, password=None, sha1=None): """Video URL and URL download properties""" if url is not None: self._url = url # note that this does not change anything else, better to use the constructor for this if url is not None and (isRTSPurl(url) or isRTMPurl(url)): self.filename(self._url) if username is not None: self._urluser = username # basic authentication if password is not None: self._urlpassword = password # basic authentication if sha1 is not None: self._urlsha1 = sha1 # file integrity if url is None and username is None and password is None and sha1 is None: return self._url else: return self
def videoid(self, newid=None)
-
Return a unique video identifier for this video, as specified in the 'video_id' attribute, or by SHA1 hash of the
Video.filename()
andVideo.url()
.Args
newid
- [str] If not None, then update the video_id as newid.
Returns
The video ID if newid=None else self
Note
- If the video filename changes (e.g. from transformation), and video_id is not set in self.attributes, then the video ID will change.
- If a video does not have a filename or URL or a video ID in the attributes, then this will return None
- To preserve a video ID independent of transformations, set self.setattribute('video_id', ${MY_ID}), or pass in newid
Expand source code Browse git
def videoid(self, newid=None): """Return a unique video identifier for this video, as specified in the 'video_id' attribute, or by SHA1 hash of the `vipy.video.Video.filename` and `vipy.video.Video.url`. Args: newid: [str] If not None, then update the video_id as newid. Returns: The video ID if newid=None else self .. note:: - If the video filename changes (e.g. from transformation), and video_id is not set in self.attributes, then the video ID will change. - If a video does not have a filename or URL or a video ID in the attributes, then this will return None - To preserve a video ID independent of transformations, set self.setattribute('video_id', ${MY_ID}), or pass in newid """ if newid is not None: self.setattribute('video_id', newid) return self else: return self.attributes['video_id'] if 'video_id' in self.attributes else (hashlib.sha1(str(str(self.filename())+str(self.url())).encode("UTF-8")).hexdigest() if (self.filename() is not None or self.url() is not None) else None)
def webp(self, outfile=None, pause=3, strict=True, smallest=False, smaller=False, framerate=None)
-
Save a video to an animated WEBP file, with pause=N seconds on the last frame between loops.
Args
strict
- If true, assert that the filename must have an .webp extension
pause
- Integer seconds to pause between loops of the animation
smallest
- if true, create the smallest possible file but takes much longer to run
smaller
- If true, create a smaller file, which takes a little longer to run
framerate [float]: The output framerate of the webp file. The default is the framerate of the video.
Returns
The filename of the webp file for this video
Warning: This may be slow for very long or large videos
Expand source code Browse git
def webp(self, outfile=None, pause=3, strict=True, smallest=False, smaller=False, framerate=None): """Save a video to an animated WEBP file, with pause=N seconds on the last frame between loops. Args: strict: If true, assert that the filename must have an .webp extension pause: Integer seconds to pause between loops of the animation smallest: if true, create the smallest possible file but takes much longer to run smaller: If true, create a smaller file, which takes a little longer to run framerate [float]: The output framerate of the webp file. The default is the framerate of the video. Returns: The filename of the webp file for this video .. warning:: This may be slow for very long or large videos """ outfile = vipy.util.tempWEBP() if outfile is None else outfile assert strict is False or iswebp(outfile) outfile = os.path.normpath(os.path.abspath(os.path.expanduser(outfile))) framerate = framerate if framerate is not None else self._framerate self.load().frame(0).pil().save(outfile, loop=0, save_all=True, method=6 if smallest else 3 if smaller else 0, append_images=[self.frame(k).pil() for k in range(1, len(self))], duration=[int(1000.0/framerate) for k in range(0, len(self)-1)] + [pause*1000]) return outfile
def width(self)
-
Width (cols) in pixels of the video for the current filter chain
Expand source code Browse git
def width(self): """Width (cols) in pixels of the video for the current filter chain""" return self.shape()[1]
def zeropad(self, padwidth, padheight)
-
Zero pad the video with padwidth columns before and after, and padheight rows before and after
Notes: Older FFMPEG implementations can throw the error "Input area #:#:#:# not within the padded area #:#:#:# or zero-sized, this is often caused by odd sized padding.
Recommend calling self.cropeven().zeropad(…) to avoid this
Expand source code Browse git
def zeropad(self, padwidth, padheight): """Zero pad the video with padwidth columns before and after, and padheight rows before and after .. notes:: Older FFMPEG implementations can throw the error "Input area #:#:#:# not within the padded area #:#:#:# or zero-sized, this is often caused by odd sized padding. Recommend calling self.cropeven().zeropad(...) to avoid this """ assert isinstance(padwidth, int) and isinstance(padheight, int) if not self.isloaded(): self.shape(shape=(self.height()+2*padheight, self.width()+2*padwidth)) # manually set shape to avoid preview self._ffmpeg = self._ffmpeg.filter('pad', 'iw+%d' % (2*padwidth), 'ih+%d' % (2*padheight), '%d'%padwidth, '%d'%padheight) elif padwidth > 0 or padheight > 0: self.array( np.pad(self.array(), ((0,0), (padheight,padheight), (padwidth,padwidth), (0,0)), mode='constant'), copy=False) # this is very expensive, since np.pad() must copy (once in np.pad >=1.17) return self
def zeropadlike(self, width, height)
-
Zero pad the video balancing the border so that the resulting video size is (width, height).
Expand source code Browse git
def zeropadlike(self, width, height): """Zero pad the video balancing the border so that the resulting video size is (width, height).""" assert width >= self.width() and height >= self.height(), "Invalid input - final (width=%d, height=%d) must be greater than current image size (width=%d, height=%d)" % (width, height, self.width(), self.height()) assert int(np.floor((width - self.width())/2)) == int(np.ceil((width - self.width())/2)), "Zero pad must be symmetric, this is often due to odd zeropadding which ffmpeg doesn't like. Try changing the width +/- 1 pixel" assert int(np.floor((height - self.height())/2)) == int(np.ceil((height - self.height())/2)), "Zero pad must be symmetric, this is often due to odd zeropadding which ffmpeg doesn't like. Try changing the height +/- 1 pixel" return self.zeropad(int(np.floor((width - self.width())/2)), int(np.floor((height - self.height())/2)))
def zeros(self)
-
Expand source code Browse git
def zeros(self): self._array = 0*self.load()._array return self
class VideoCategory (filename=None, url=None, framerate=30.0, attributes=None, category=None, array=None, colorspace=None, startframe=None, endframe=None, startsec=None, endsec=None)
-
vipy.video.VideoCategory class
A VideoCategory is a video with associated category, such as an activity class. This class includes all of the constructors of vipy.video.Video along with the ability to extract a clip based on frames or seconds.
Expand source code Browse git
class VideoCategory(Video): """vipy.video.VideoCategory class A VideoCategory is a video with associated category, such as an activity class. This class includes all of the constructors of vipy.video.Video along with the ability to extract a clip based on frames or seconds. """ def __init__(self, filename=None, url=None, framerate=30.0, attributes=None, category=None, array=None, colorspace=None, startframe=None, endframe=None, startsec=None, endsec=None): super().__init__(url=url, filename=filename, framerate=framerate, attributes=attributes, array=array, colorspace=colorspace, startframe=startframe, endframe=endframe, startsec=startsec, endsec=endsec) self._category = category @classmethod def from_json(cls, s): d = json.loads(s) if not isinstance(s, dict) else s d = {k.lstrip('_'):v for (k,v) in d.items()} # prettyjson (remove "_" prefix to attributes) v = super().from_json(s) v._category = d['category'] return v def __repr__(self): strlist = [] if self.isloaded(): strlist.append("height=%d, width=%d, frames=%d" % (self._array[0].shape[0], self._array[0].shape[1], len(self._array))) if self.filename() is not None: strlist.append('filename="%s"' % self.filename()) if self.hasurl(): strlist.append('url="%s"' % self.url()) if self.category() is not None: strlist.append('category="%s"' % self.category()) if not self.isloaded() and self._startframe is not None and self._endframe is not None: strlist.append('clip=(%d,%d)' % (self._startframe, self._endframe)) if not self.isloaded() and self._startframe is not None and self._endframe is None: strlist.append('clip=(%d,)' % (self._startframe)) return str('<vipy.video.VideoCategory: %s>' % (', '.join(strlist))) def json(self, encode=True): d = json.loads(super().json()) d['category'] = self._category return json.dumps(d) if encode else d def category(self, c=None): if c is None: return self._category else: self._category = c return self
Ancestors
Subclasses
Methods
def category(self, c=None)
-
Expand source code Browse git
def category(self, c=None): if c is None: return self._category else: self._category = c return self
Inherited members
Video
:abspath
array
aspect_ratio
bias
bytes
canload
cast
centercrop
centersquare
channels
channelshape
clear
clearattributes
clip
cliprange
clone
colorspace
commandline
concatenate
crop
cropeven
dict
download
downloadif
duration
duration_in_frames
duration_in_frames_of_videofile
duration_in_seconds_of_videofile
fetch
ffplay
filename
filesize
fliplr
flipud
flush
flush_and_return
frame
frame_meta
framerate
framerate_of_videofile
frames
from_json
fromarray
fromdirectory
fromframes
gain
getattribute
gif
hasattribute
hasfilename
hasurl
height
iframes
iscolor
isdownloaded
isgrayscale
isloadable
isloaded
issquare
json
load
map
maxdim
maxmatte
maxsquare
metadata
metaframe
mindim
minsquare
mutable
normalize
nourl
numpy
pad
pkl
pklif
play
preview
print
printif
probe
probeshape
quicklook
randomcrop
relpath
rename
rescale
resize
resolution_of_videofile
restore
returns
rot90ccw
rot90cw
sanitize
saveas
savetemp
savetmp
shape
show
speed
store
stream
take
thumbnail
tocache
tonumpy
torch
unstore
url
videoid
webp
width
zeropad
zeropadlike