Module vipy.data.meva
Expand source code Browse git
import os
from vipy.util import remkdir, readjson, readyaml, findyaml, findvideo, filetail, findjson, filebase, readlist, groupbyasdict, save, flatlist, isstring, tempdir, readcsv, delpath, temphtml, tolist
from vipy.video import VideoCategory, Scene
from vipy.object import Track
from vipy.activity import Activity
from vipy.geometry import BoundingBox
from vipy.show import colorlist
import numpy as np
import warnings
import shutil
import vipy.globals
d_category_to_shortlabel = {'person_abandons_package':'Abandoning',
'person_closes_facility_door':'Closing',
'person_closes_trunk':'Closing trunk',
'person_closes_vehicle_door':'Closing door',
'person_embraces_person':'Hugging',
'person_enters_scene_through_structure':'Entering',
'person_enters_vehicle':'Entering',
'person_exits_scene_through_structure':'Exiting',
'person_exits_vehicle':'Exiting',
'hand_interacts_with_person':'Using hand',
'person_carries_heavy_object':'Carrying',
'person_interacts_with_laptop':'Using laptop',
'person_loads_vehicle':'Loading',
'person_transfers_object':'Transferring',
'person_opens_facility_door':'Opening door',
'person_opens_trunk':'Opening trunk',
'person_opens_vehicle_door':'Opening door',
'person_talks_to_person':'Talking to',
'person_picks_up_object':'Picking up',
'person_purchases':'Purchasing',
'person_reads_document':'Reading',
'person_rides_bicycle':'Riding',
'person_puts_down_object':'Putting down',
'person_sits_down':'Sitting',
'person_stands_up':'Standing',
'person_talks_on_phone':'Talking',
'person_texts_on_phone':'Texting',
'person_steals_object':'Stealing',
'person_unloads_vehicle':'Unloading',
'vehicle_drops_off_person':'Dropping off',
'vehicle_picks_up_person':'Picking up',
'vehicle_reverses':'Reversing',
'vehicle_starts':'Starting',
'vehicle_stops':'Stopping',
'vehicle_turns_left':'Turning left',
'vehicle_turns_right':'Turning right',
'vehicle_makes_u_turn':'Turning around'}
class KF1(object):
def __init__(self, videodir, repodir, contrib=False, stride=1, verbose=True, n_videos=None, withprefix=None, d_category_to_shortlabel=None, merge=False, actor=False, disjoint=False, unpad=False):
"""Parse MEVA annotations (http://mevadata.org) for Known Facility 1 dataset into vipy.video.Scene() objects
Kwiver packet format: https://gitlab.kitware.com/meva/meva-data-repo/blob/master/documents/KPF-specification-v4.pdf
Args:
videodir: [str] path to Directory containing 'drop-01'
repodir: [str] path to directory containing clone of https://gitlab.kitware.com/meva/meva-data-repo
stride: [int] the integer temporal stride in frames for importing bounding boxes, vipy will do linear interpoluation and boundary handling
n_videos: [int] only return an integer number of videos, useful for debugging or for previewing dataset
withprefix: [list] only return videos with the filename containing one of the strings in withprefix list, useful for debugging
contrib: [bool] include the noisy contrib anntations from DIVA performers
d_category_to_shortlabel: [dict] is a dictionary mapping category names to a short displayed label on the video. The standard for visualization is that tracked objects are displayed with their category label (e.g. 'Person', 'Vehicle'), and activities are labeled according to the set of objects that performing the activity. When an activity occurs, the set of objects are labeled with the same color as 'Noun Verbing' (e.g. 'Person Entering', 'Person Reading', 'Vehicle Starting') where 'Verbing' is provided by the shortlabel. This is optional, and will use the default mapping if None
verbose: [bool] Parsing verbosity
merge: [bool] deduplicate annotations for each video across YAML files by merging them by mean spatial IoU per track (>0.5) and temporal IoU (>0)
actor: [bool] Include only those activities that include an associated track for the primary actor: "Person" for "person_*" and "hand_*", else "Vehicle"
disjoint: [bool]: Enforce that overlapping causal activities (open/close, enter/exit, ...) are disjoint for a track
unpad: [bool] remove the arbitrary padding assigned during dataset creation
Returns:
a list of `vipy.video.Scene` objects
"""
self.videodir = videodir
self.repodir = repodir
assert os.path.exists(os.path.join(self.videodir, 'drop-01')), "Invalid input - videodir '%s' must contain the drop-01, drop-02 and drop-03 subdirectories. See http://mevadata.org/#getting-data" % videodir
assert os.path.exists(os.path.join(self.repodir, 'annotation')), "Invalid input - repodir '%s' must contain the clone of https://gitlab.kitware.com/meva/meva-data-repo" % repodir
# Shortlabels are optional and used for showing labels on videos only
self._d_category_to_shortlabel = vipy.data.meva.d_category_to_shortlabel
self._d_category_to_shortlabel = {k:v.lower() for (k,v) in self._d_category_to_shortlabel.items()}
self._d_oldcategory_to_newcategory = {k:v for (k,v) in readcsv(os.path.join(self.repodir, 'documents', 'activity-name-mapping.csv'))[1:]}
d_category_to_shortlabel = d_category_to_shortlabel if d_category_to_shortlabel is not None else self._d_category_to_shortlabel
d_videoname_to_path = {filebase(f):f for f in self._get_videos()}
yamlfiles = zip(self._get_types_yaml(), self._get_geom_yaml(), self._get_activities_yaml())
yamlfiles = [y for y in yamlfiles if contrib is True or 'contrib' not in y[0]]
yamlfiles = list(yamlfiles)[0:n_videos] if n_videos is not None else list(yamlfiles)
if withprefix is not None:
yamlfiles = [y for y in yamlfiles if any([(p in y[0]) for p in tolist(withprefix)])]
if verbose:
print('[vipy.data.meva.KF1]: Loading %d YAML files' % len(yamlfiles))
if len(yamlfiles) > 100 and vipy.globals.parallel() <= 1:
print('[vipy.data.meva.KF1]: This takes a while since parsing YAML files in python is painfully slow, consider calling "vipy.globals.parallel(n)" for n>1 before loading the dataset for parallel parsing')
# Parallel video annotation: set vipy.globals.parallel(n) for n parallel workers for the Batch() processing
from vipy.batch import Batch # requires Dask, Distributed
self._vidlist = Batch(list(yamlfiles)).map(lambda tga: self._parse_video(d_videoname_to_path, d_category_to_shortlabel, tga[0], tga[1], tga[2], stride=stride, verbose=verbose, actor=actor)).result()
self._vidlist = [v for v in self._vidlist if v is not None]
# Merge and dedupe activities and tracks across YAML files for same video, using temporal and spatial IoU association.
# The MEVA dataset is "activity-centric" so that each activity is labeled independently. There may be tracks in the dataset
# that are the same instance in the video, but are different track IDs in the dataset. The result is disjoint activity labels
# in a non-disjoint activity in a video. Yuck.. Try to merge them. This is experimental, since it tries to use IoU for merging,
# which does not work in general. This requires global track correspondence.
if merge:
print('[vipy.data.meva.KF1]: merging videos ...')
V = list(groupbyasdict([a for vid in self._vidlist for a in vid.activitysplit()], lambda s: s.filename()).values())
self._vidlist = Batch(V).map(lambda v: v[0].clone().union(v[1:])).result()
# Enforce disjoint causal activities
# Due to the arbitrary temporal padding in the annotation definitions, merged causal activiites can overlap
# Enforce that causal activities (open/close, enter/exit, pickup/putdown, load/unload) for the same track are disjoint
if disjoint:
V = self._vidlist
V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_closes_vehicle_door']) if a.category() == 'person_opens_vehicle_door' else a) for v in V]
V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_closes_vehicle_trunk']) if a.category() == 'person_opens_vehicle_trunk' else a) for v in V]
V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_exits_vehicle']) if a.category() == 'person_enters_vehicle' else a) for v in V]
V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_enters_scene_through_structure']) if a.category() == 'person_exits_scene_through_structure' else a) for v in V]
V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_closes_facility_door']) if a.category() == 'person_opens_facility_door' else a) for v in V]
V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_unloads_vehicle']) if a.category() == 'person_loads_vehicle' else a) for v in V]
V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_puts_down_object']) if a.category() == 'person_picks_up_object' else a) for v in V]
V = [v.activityfilter(lambda a: len(a)>0) for v in V] # some activities may be zero length after disjoint
self._vidlist = V
# Remove the arbitrary temporal padding applied during dataset creation
if unpad:
# MEVA annotations assumptions: https://docs.google.com/spreadsheets/d/19I3C5Zb6RHS0QC30nFT_m0ymArzjvlPLfb5SSRQYLUQ/edit#gid=0
# Pad one second before, zero seconds after
before1after0 = set(['person_opens_facility_door', 'person_closes_facility_door', 'person_opens_vehicle_door', 'person_closes_vehicle_door',
'person_opens_trunk', 'person_closes_trunk', 'vehicle_stops', 'person_interacts_with_laptop'])
V = [v.activitymap(lambda a: a.temporalpad( (-v.framerate()*1.0, 0) ) if a.category() in before1after0 else a) for v in self._vidlist]
# pad one second before, one second after, up to maximum of two seconds
before1after1max2 = set(['person_enters_scene_through_structure'])
V = [v.activitymap(lambda a: a.temporalpad(max(0, -v.framerate()*1.0)) if a.category() in before1after1max2 else a) for v in V]
# person_exits_scene_through_structure: Pad one second before person_opens_facility_door label (if door collection), and ends with enough padding to make this minimum two seconds
V = [v.activitymap(lambda a: a.temporalpad( (-v.framerate()*1.0, 0) ) if a.category() == 'person_exits_scene_through_structure' else a) for v in V]
# person_enters_vehicle: Starts one second before person_opens_vehicle_door activity label and ends at the end of person_closes_vehicle_door activity
V = [v.activitymap(lambda a: a.temporalpad( (-v.framerate()*1.0, 0) ) if a.category() == 'person_enters_vehicle' else a) for v in V]
# person_exits_vehicle: Starts one second before person_opens_vehicle_door, and ends at person_exits_vehicle with enough padding to make this minimum two seconds
V = [v.activitymap(lambda a: a.temporalpad( (-v.framerate()*1.0, 0) ) if a.category() == 'person_exits_vehicle' else a) for v in V]
# person_unloads_vehicle: one second of padding before cargo starts to move
V = [v.activitymap(lambda a: a.temporalpad( (-v.framerate()*1.0, 0) ) if a.category() == 'person_unloads_vehicle' else a) for v in V]
# person_talks_to_person: Equal padding to minimum of five seconds
# person_texting_on_phone: Equal padding to minimum of two seconds
# Pad one second before, one second after
before1after1 = set(['vehicle_turns_left', 'vehicle_turns_right', 'person_transfers_object',
'person_sets_down_object', 'hand_interacts_with_person', 'person_embraces_person', 'person_purchases',
'vehicle_picks_up_person', 'vehicle_drops_off_person'])
V = [v.activitymap(lambda a: a.temporalpad(-v.framerate()*1.0) if a.category() in before1after1 else a) for v in V]
# Pad zero second before, one second after
before0after1 = set(['vehicle_makes_u_turn', 'person_picks_up_object'])
V = [v.activitymap(lambda a: a.temporalpad( (0, -v.framerate()*1.0) ) if a.category() in before0after1 else a) for v in V]
# person_abandons_package: two seconds before, two seconds after
V = [v.activitymap(lambda a: a.temporalpad(-v.framerate()*2.0) if a.category() == 'person_abandons_package' else a) for v in V]
self._vidlist = V
# Remove empty tracks and activities
self._vidlist = [v.trackfilter(lambda t: len(t) > 0) for v in self._vidlist]
self._vidlist = [v.activityfilter(lambda a: len(a) > 0) for v in self._vidlist]
def __getitem__(self, k):
return self._vidlist[k]
def __iter__(self):
for v in self._vidlist:
yield v
def __len__(self):
return len(self._vidlist)
def __repr__(self):
return str('<vipy.data.meva.KF1: videos=%d, videodir="%s", annotationdir="%s">' % (len(self), self.videodir, self.repodir))
def _activities_to_required_objects(self):
"""Return a dictionary of activity keys to set of required objects. This is currently wrong."""
raise ValueError('This mapping is currently wrong in the Kitware repository')
d = readjson(os.path.join(self.repodir, 'annotation', 'DIVA-phase-2', 'activity-index.json'))
return {a:set([x.replace('Construction_Vehicle', 'Vehicle') for x in d[a]['objectTypes']]) for a in self.activities()}
def _get_activities_yaml(self):
return sorted([x for x in findyaml(self.repodir) if 'activities.yml' in x])
def _get_geom_yaml(self):
return sorted([x for x in findyaml(self.repodir) if 'geom.yml' in x])
def _get_types_yaml(self):
return sorted([x for x in findyaml(self.repodir) if 'types.yml' in x])
def _get_fileindex_json(self):
return sorted([x for x in findjson(self.repodir) if 'file-index.json' in x])
def _get_activities_json(self):
return sorted([x for x in findjson(self.repodir) if 'activities.json' in x])
def _get_videos(self):
return sorted([x for x in findvideo(self.videodir)])
def _parse_video(self, d_videoname_to_path, d_category_to_shortlabel, types_yamlfile, geom_yamlfile, act_yamlfile, stride=1, verbose=False, actor=False):
"""Reference: https://gitlab.kitware.com/meva/meva-data-repo/-/blob/master/documents/KPF-specification-v4.pdf"""
# Read YAML
if verbose:
print('[vipy.data.meva.KF1]: Parsing "%s"' % (act_yamlfile))
geom_yaml = readyaml(geom_yamlfile)
types_yaml = readyaml(types_yamlfile)
act_yaml = readyaml(act_yamlfile)
# Sanity check
assert act_yamlfile.split('.')[:-2] == geom_yamlfile.split('.')[:-2], "Unmatched activity and geom yaml file"
assert 'meta' not in types_yaml[0] or len(set([types_yaml[0]['meta'], geom_yaml[0]['meta'], act_yaml[0]['meta']]))==1, "Mismatched video name for '%s'" % act_yamlfile
try:
videoname = act_yaml[0]['meta'] if act_yaml[0]['meta'][-4:] != '.avi' else act_yaml[0]['meta'][0:-4] # strip .avi
except:
videoname = vipy.util.filetail(act_yamlfile.split('activities.yml')[0][0:-1]) # /path/to/filebase-activities.yml or /path/to/filebase.activities.yml [YUCK, necessary for newer drops in Spring 2021]
if videoname not in d_videoname_to_path:
if verbose:
print('[vipy.data.meva.KF1]: Invalid MEVA video "%s" in "%s" - Ignoring' % (videoname, filebase(act_yamlfile)))
return None
# Parse video
framerate = 30.0 # All videos are universally 30Hz (from Roddy)
vid = Scene(filename=d_videoname_to_path[videoname], framerate=framerate)
# Parse tracks
d_id1_to_category = {}
for t in types_yaml:
if 'types' in t:
d_id1_to_category[t['types']['id1']] = list(t['types']['cset3'].keys())[0]
d_id1_to_track = {}
d_geom_yaml = groupbyasdict([x['geom'] for x in geom_yaml if 'geom' in x], lambda v: v['id1'])
assert stride >= 1, "Invalid stride"
for (id1, geom_yaml) in d_geom_yaml.items():
geom_yaml = sorted(geom_yaml, key=lambda x: int(x['ts0'])) # increasing
for (k_geom, v) in enumerate(geom_yaml):
if stride > 1 and k_geom > 0 and (k_geom < (len(geom_yaml)-stride)) and (k_geom % stride != 0):
continue # Use vipy track interpolation to speed up parsing
keyframe = int(v['ts0'])
bb = [int(x) for x in v['g0'].split(' ')]
bbox = BoundingBox(xmin=bb[0], ymin=bb[1], xmax=bb[2], ymax=bb[3])
if not bbox.isvalid():
if verbose:
print('[vipy.data.meva.KF1]: Invalid bounding box: id1=%s, bbox="%s", file="%s" - Ignoring' % (str(v['id1']), str(bbox), delpath(self.repodir, geom_yamlfile)))
elif v['id1'] not in d_id1_to_track:
d_id1_to_track[v['id1']] = Track(category=d_id1_to_category[v['id1']], framerate=framerate, keyframes=[keyframe], boxes=[bbox], boundary='strict')
else:
d_id1_to_track[v['id1']].add(keyframe=keyframe, bbox=bbox)
# Add tracks to scene
for (k,v) in d_id1_to_track.items():
try:
vid.add(v, rangecheck=True) # throw exception if all tracks are outside the image rectangle
except Exception as e:
print('[vipy.data.meva.KF1]: track import error "%s" for trackid=%s, track=%s - SKIPPING' % (str(e), k, str(v)))
# Category to actor: This defines the primary role for the activity (for tube based representations)
f_activity_to_actor = lambda c: 'Person' if (c.split('_')[0] == 'person' or 'hand' in c) else 'Vehicle'
# Parse activities
for v in act_yaml:
if 'act' in v:
if 'act2' in v['act']:
act2 = v['act']['act2']
if isinstance(act2, set):
category = list(act2)[0]
v['act']['act2'] = list(act2) # for JSON serialization
elif isinstance(act2, dict):
category = list(act2.keys())[0]
else:
raise ValueError('YAML parsing error for "%s"' % str(act2))
elif 'act3' in v['act']:
act3 = v['act']['act3']
if isinstance(act3, set):
category = list(act3)[0]
v['act']['act3'] = list(act3) # for JSON serialization
elif isinstance(act3, dict):
category = list(act3.keys())[0]
else:
raise ValueError('YAML parsing error for "%s"' % str(act3))
else:
raise ValueError('Invalid activity YAML - act2 or act3 must be specified')
assert len(v['act']['timespan']) == 1, "Multi-span activities not parsed"
if category not in self.categories():
if category in self._d_oldcategory_to_newcategory:
category = self._d_oldcategory_to_newcategory[category] # rationalize
else:
raise ValueError('undefined category "%s"' % category)
startframe = int(v['act']['timespan'][0]['tsr0'][0])
endframe = int(v['act']['timespan'][0]['tsr0'][1])
actorid = [x['id1'] for x in v['act']['actors']]
if True:
nounid = [d_id1_to_track[a].id() for a in actorid if (a in d_id1_to_track) and (f_activity_to_actor(category).lower() == d_id1_to_track[a].category().lower())]
if len(nounid) == 0 and actor:
print('[vipy.data.meva.KF1]: Warning - activity "%s" without a required primary actor "%s" - SKIPPING' % (category, f_activity_to_actor(category)))
continue # skip it
elif len(nounid) == 0:
print('[vipy.data.meva.KF1]: Warning - activity "%s" without a required primary actor "%s"' % (category, f_activity_to_actor(category)))
nounid = nounid[0] if len(nounid) > 0 else None # first track in activity of required object class for this category is assumed to be the performer/actor/noun
for aid in actorid:
if not aid in d_id1_to_track:
print('[vipy.data.meva.KF1]: ActorID %d referenced in activity yaml "%s" not found in geom yaml "%s" - Skipping' % (aid, delpath(self.repodir, act_yamlfile), delpath(self.repodir, geom_yamlfile)))
# Add activity to scene: include YAML file details in activity attributes for provenance if there are labeling bugs
tracks = {d_id1_to_track[aid].id():d_id1_to_track[aid] for aid in actorid if aid in d_id1_to_track} # order preserving (python 3.6)
if len(tracks) > 0:
try:
vid.add(Activity(category=category, shortlabel=d_category_to_shortlabel[category], actorid=nounid if actor else None,
startframe=startframe, endframe=endframe, tracks=tracks, framerate=framerate,
attributes={'act':str(v['act']), 'act_yaml':act_yamlfile, 'geom_yaml':geom_yamlfile}), rangecheck=True)
except Exception as e:
print('[vipy.data.meva.KF1]: activity import error "%s" for activity="%s" - SKIPPING' % (str(e), str(v)))
return vid
def videos(self):
"""Return list of activity videos"""
return [v for v in self._vidlist if v is not None]
def tolist(self):
return self.videos()
def instances(self, padframes=0):
"""Return list of activity instances"""
if vipy.globals.max_workers() > 1:
return [a for A in Batch(self.videos()).activityclip(padframes=padframes).result() for a in A]
else:
warnings.warn('Consider setting vipy.globals.max_workers(n) for n>1 to speed this up')
return [a for v in self.videos() for a in v.activityclip(padframes=padframes)]
def categories(self):
"""Return a list of activity categories"""
return sorted(list(self._d_category_to_shortlabel.keys()))
def analysis(self, outdir=None):
"""Analyze the MEVA dataset to return helpful statistics and plots"""
import matplotlib.pyplot as plt
import vipy.metrics
videos = self._vidlist
scenes = flatlist([m.activityclip() for m in videos if m is not None])
activities = flatlist([s.activities().values() for s in scenes])
tracks = flatlist([s.tracks().values() for s in scenes])
outdir = tempdir() if outdir is None else outdir
# Category distributions
d = {}
d['activity_categories'] = set([a.category() for a in activities])
d['object_categories'] = set([t.category() for t in tracks])
d['videos'] = set([v.filename() for v in videos if v is not None])
d['num_activities'] = sorted([(k,len(v)) for (k,v) in groupbyasdict(activities, lambda a: a.category()).items()], key=lambda x: x[1])
d['video_density'] = sorted([(v.filename(),len(v.activities())) for v in videos if v is not None], key=lambda x: x[1])
# Histogram of instances
(categories, freq) = zip(*reversed(d['num_activities']))
barcolors = ['blue' if not 'vehicle' in c else 'green' for c in categories]
d['num_activities_histogram'] = vipy.metrics.histogram(freq, categories, barcolors=barcolors, outfile=os.path.join(outdir, 'num_activities_histogram.pdf'), ylabel='Instances')
colors = colorlist()
# Scatterplot of people and vehicles box sizes
(x, y) = zip(*[(t.meanshape()[1], t.meanshape()[0]) for t in tracks])
plt.clf()
plt.figure()
plt.grid(True)
d_category_to_color = {'person':'blue', 'vehicle':'green'}
for c in ['person', 'vehicle']:
(xc, yc) = zip(*[(t.meanshape()[1], t.meanshape()[0]) for t in tracks if t.category() == c])
plt.scatter(xc, yc, c=d_category_to_color[c], label=c)
plt.xlabel('bounding box (width)')
plt.ylabel('bounding box (height)')
plt.axis([0, 1000, 0, 1000])
plt.legend()
plt.gca().set_axisbelow(True)
d['object_bounding_box_scatterplot'] = os.path.join(outdir, 'object_bounding_box_scatterplot.pdf')
plt.savefig(d['object_bounding_box_scatterplot'])
# 2D histogram of people and vehicles box sizes
for c in ['person', 'vehicle']:
(xc, yc) = zip(*[(t.meanshape()[1], t.meanshape()[0]) for t in tracks if t.category() == c])
plt.clf()
plt.figure()
plt.hist2d(xc, yc, bins=10)
plt.xlabel('Bounding box (width)')
plt.ylabel('Bounding box (height)')
d['2D_%s_bounding_box_histogram' % c] = os.path.join(outdir, '2D_%s_bounding_box_histogram.pdf' % c)
plt.savefig(d['2D_%s_bounding_box_histogram' % c])
# Mean track size per activity category
d_category_to_xy = {k:np.mean([t.meanshape() for v in vlist for t in v.tracklist()], axis=0) for (k,vlist) in groupbyasdict(scenes, lambda v: v.category()).items()}
plt.clf()
plt.figure()
plt.grid(True)
d_category_to_color = {c:colors[k % len(colors)] for (k,c) in enumerate(d_category_to_xy.keys())}
for c in d_category_to_xy.keys():
(xc, yc) = d_category_to_xy[c]
plt.scatter(xc, yc, c=d_category_to_color[c], label=c)
plt.xlabel('bounding box (width)')
plt.ylabel('bounding box (height)')
plt.axis([0, 600, 0, 600])
plt.gca().set_axisbelow(True)
lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
d['activity_bounding_box_scatterplot'] = os.path.join(outdir, 'activity_bounding_box_scatterplot.pdf')
plt.savefig(d['activity_bounding_box_scatterplot'], bbox_extra_artists=(lgd,), bbox_inches='tight')
return d
def review(self, outfile=None, mindim=512):
"""Generate a standalone HTML file containing quicklooks for each annotated activity in dataset, along with some helpful provenance information for where the annotation came from"""
if vipy.globals.max_workers() == 1:
warnings.warn("Generating review HTML is very time consuming, consider setting vipy.global.max_workers(n) for n > 1 for parallel video processing")
quicklist = Batch(self._vidlist).map(lambda v: [(c.load().quicklook(context=True), c.flush()) for c in v.mindim(512).activityclip()]).result()
quicklooks = [imq for q in quicklist for (imq, c) in q] # for HTML display purposes
provenance = [{'clip':str(c), 'activities':str(';'.join([str(a) for a in c.activitylist()])), 'category':c.category(), 'yamlfile':c.activitylist()[0].attributes['act_yaml']} for q in quicklist for (imq, c) in q]
(quicklooks, provenance) = zip(*sorted([(q,p) for (q,p) in zip(quicklooks, provenance)], key=lambda x: x[1]['category'])) # sorted in category order
return vipy.visualize.tohtml(quicklooks, provenance, title='MEVA-KF1 annotation quicklooks', outfile=outfile, mindim=mindim)
Classes
class KF1 (videodir, repodir, contrib=False, stride=1, verbose=True, n_videos=None, withprefix=None, d_category_to_shortlabel=None, merge=False, actor=False, disjoint=False, unpad=False)
-
Parse MEVA annotations (http://mevadata.org) for Known Facility 1 dataset into vipy.video.Scene() objects
Kwiver packet format: https://gitlab.kitware.com/meva/meva-data-repo/blob/master/documents/KPF-specification-v4.pdf
Args
videodir
- [str] path to Directory containing 'drop-01'
repodir
- [str] path to directory containing clone of https://gitlab.kitware.com/meva/meva-data-repo
stride
- [int] the integer temporal stride in frames for importing bounding boxes, vipy will do linear interpoluation and boundary handling
n_videos
- [int] only return an integer number of videos, useful for debugging or for previewing dataset
withprefix
- [list] only return videos with the filename containing one of the strings in withprefix list, useful for debugging
contrib
- [bool] include the noisy contrib anntations from DIVA performers
d_category_to_shortlabel
- [dict] is a dictionary mapping category names to a short displayed label on the video. The standard for visualization is that tracked objects are displayed with their category label (e.g. 'Person', 'Vehicle'), and activities are labeled according to the set of objects that performing the activity. When an activity occurs, the set of objects are labeled with the same color as 'Noun Verbing' (e.g. 'Person Entering', 'Person Reading', 'Vehicle Starting') where 'Verbing' is provided by the shortlabel. This is optional, and will use the default mapping if None
verbose
- [bool] Parsing verbosity
merge
- [bool] deduplicate annotations for each video across YAML files by merging them by mean spatial IoU per track (>0.5) and temporal IoU (>0)
actor
- [bool] Include only those activities that include an associated track for the primary actor: "Person" for "person_" and "hand_", else "Vehicle"
disjoint
- [bool]: Enforce that overlapping causal activities (open/close, enter/exit, …) are disjoint for a track
unpad
- [bool] remove the arbitrary padding assigned during dataset creation
Returns
a list of
Scene
objectsExpand source code Browse git
class KF1(object): def __init__(self, videodir, repodir, contrib=False, stride=1, verbose=True, n_videos=None, withprefix=None, d_category_to_shortlabel=None, merge=False, actor=False, disjoint=False, unpad=False): """Parse MEVA annotations (http://mevadata.org) for Known Facility 1 dataset into vipy.video.Scene() objects Kwiver packet format: https://gitlab.kitware.com/meva/meva-data-repo/blob/master/documents/KPF-specification-v4.pdf Args: videodir: [str] path to Directory containing 'drop-01' repodir: [str] path to directory containing clone of https://gitlab.kitware.com/meva/meva-data-repo stride: [int] the integer temporal stride in frames for importing bounding boxes, vipy will do linear interpoluation and boundary handling n_videos: [int] only return an integer number of videos, useful for debugging or for previewing dataset withprefix: [list] only return videos with the filename containing one of the strings in withprefix list, useful for debugging contrib: [bool] include the noisy contrib anntations from DIVA performers d_category_to_shortlabel: [dict] is a dictionary mapping category names to a short displayed label on the video. The standard for visualization is that tracked objects are displayed with their category label (e.g. 'Person', 'Vehicle'), and activities are labeled according to the set of objects that performing the activity. When an activity occurs, the set of objects are labeled with the same color as 'Noun Verbing' (e.g. 'Person Entering', 'Person Reading', 'Vehicle Starting') where 'Verbing' is provided by the shortlabel. This is optional, and will use the default mapping if None verbose: [bool] Parsing verbosity merge: [bool] deduplicate annotations for each video across YAML files by merging them by mean spatial IoU per track (>0.5) and temporal IoU (>0) actor: [bool] Include only those activities that include an associated track for the primary actor: "Person" for "person_*" and "hand_*", else "Vehicle" disjoint: [bool]: Enforce that overlapping causal activities (open/close, enter/exit, ...) are disjoint for a track unpad: [bool] remove the arbitrary padding assigned during dataset creation Returns: a list of `vipy.video.Scene` objects """ self.videodir = videodir self.repodir = repodir assert os.path.exists(os.path.join(self.videodir, 'drop-01')), "Invalid input - videodir '%s' must contain the drop-01, drop-02 and drop-03 subdirectories. See http://mevadata.org/#getting-data" % videodir assert os.path.exists(os.path.join(self.repodir, 'annotation')), "Invalid input - repodir '%s' must contain the clone of https://gitlab.kitware.com/meva/meva-data-repo" % repodir # Shortlabels are optional and used for showing labels on videos only self._d_category_to_shortlabel = vipy.data.meva.d_category_to_shortlabel self._d_category_to_shortlabel = {k:v.lower() for (k,v) in self._d_category_to_shortlabel.items()} self._d_oldcategory_to_newcategory = {k:v for (k,v) in readcsv(os.path.join(self.repodir, 'documents', 'activity-name-mapping.csv'))[1:]} d_category_to_shortlabel = d_category_to_shortlabel if d_category_to_shortlabel is not None else self._d_category_to_shortlabel d_videoname_to_path = {filebase(f):f for f in self._get_videos()} yamlfiles = zip(self._get_types_yaml(), self._get_geom_yaml(), self._get_activities_yaml()) yamlfiles = [y for y in yamlfiles if contrib is True or 'contrib' not in y[0]] yamlfiles = list(yamlfiles)[0:n_videos] if n_videos is not None else list(yamlfiles) if withprefix is not None: yamlfiles = [y for y in yamlfiles if any([(p in y[0]) for p in tolist(withprefix)])] if verbose: print('[vipy.data.meva.KF1]: Loading %d YAML files' % len(yamlfiles)) if len(yamlfiles) > 100 and vipy.globals.parallel() <= 1: print('[vipy.data.meva.KF1]: This takes a while since parsing YAML files in python is painfully slow, consider calling "vipy.globals.parallel(n)" for n>1 before loading the dataset for parallel parsing') # Parallel video annotation: set vipy.globals.parallel(n) for n parallel workers for the Batch() processing from vipy.batch import Batch # requires Dask, Distributed self._vidlist = Batch(list(yamlfiles)).map(lambda tga: self._parse_video(d_videoname_to_path, d_category_to_shortlabel, tga[0], tga[1], tga[2], stride=stride, verbose=verbose, actor=actor)).result() self._vidlist = [v for v in self._vidlist if v is not None] # Merge and dedupe activities and tracks across YAML files for same video, using temporal and spatial IoU association. # The MEVA dataset is "activity-centric" so that each activity is labeled independently. There may be tracks in the dataset # that are the same instance in the video, but are different track IDs in the dataset. The result is disjoint activity labels # in a non-disjoint activity in a video. Yuck.. Try to merge them. This is experimental, since it tries to use IoU for merging, # which does not work in general. This requires global track correspondence. if merge: print('[vipy.data.meva.KF1]: merging videos ...') V = list(groupbyasdict([a for vid in self._vidlist for a in vid.activitysplit()], lambda s: s.filename()).values()) self._vidlist = Batch(V).map(lambda v: v[0].clone().union(v[1:])).result() # Enforce disjoint causal activities # Due to the arbitrary temporal padding in the annotation definitions, merged causal activiites can overlap # Enforce that causal activities (open/close, enter/exit, pickup/putdown, load/unload) for the same track are disjoint if disjoint: V = self._vidlist V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_closes_vehicle_door']) if a.category() == 'person_opens_vehicle_door' else a) for v in V] V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_closes_vehicle_trunk']) if a.category() == 'person_opens_vehicle_trunk' else a) for v in V] V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_exits_vehicle']) if a.category() == 'person_enters_vehicle' else a) for v in V] V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_enters_scene_through_structure']) if a.category() == 'person_exits_scene_through_structure' else a) for v in V] V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_closes_facility_door']) if a.category() == 'person_opens_facility_door' else a) for v in V] V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_unloads_vehicle']) if a.category() == 'person_loads_vehicle' else a) for v in V] V = [v.activitymap(lambda a: a.disjoint([sa for sa in v.activitylist() if sa.category() == 'person_puts_down_object']) if a.category() == 'person_picks_up_object' else a) for v in V] V = [v.activityfilter(lambda a: len(a)>0) for v in V] # some activities may be zero length after disjoint self._vidlist = V # Remove the arbitrary temporal padding applied during dataset creation if unpad: # MEVA annotations assumptions: https://docs.google.com/spreadsheets/d/19I3C5Zb6RHS0QC30nFT_m0ymArzjvlPLfb5SSRQYLUQ/edit#gid=0 # Pad one second before, zero seconds after before1after0 = set(['person_opens_facility_door', 'person_closes_facility_door', 'person_opens_vehicle_door', 'person_closes_vehicle_door', 'person_opens_trunk', 'person_closes_trunk', 'vehicle_stops', 'person_interacts_with_laptop']) V = [v.activitymap(lambda a: a.temporalpad( (-v.framerate()*1.0, 0) ) if a.category() in before1after0 else a) for v in self._vidlist] # pad one second before, one second after, up to maximum of two seconds before1after1max2 = set(['person_enters_scene_through_structure']) V = [v.activitymap(lambda a: a.temporalpad(max(0, -v.framerate()*1.0)) if a.category() in before1after1max2 else a) for v in V] # person_exits_scene_through_structure: Pad one second before person_opens_facility_door label (if door collection), and ends with enough padding to make this minimum two seconds V = [v.activitymap(lambda a: a.temporalpad( (-v.framerate()*1.0, 0) ) if a.category() == 'person_exits_scene_through_structure' else a) for v in V] # person_enters_vehicle: Starts one second before person_opens_vehicle_door activity label and ends at the end of person_closes_vehicle_door activity V = [v.activitymap(lambda a: a.temporalpad( (-v.framerate()*1.0, 0) ) if a.category() == 'person_enters_vehicle' else a) for v in V] # person_exits_vehicle: Starts one second before person_opens_vehicle_door, and ends at person_exits_vehicle with enough padding to make this minimum two seconds V = [v.activitymap(lambda a: a.temporalpad( (-v.framerate()*1.0, 0) ) if a.category() == 'person_exits_vehicle' else a) for v in V] # person_unloads_vehicle: one second of padding before cargo starts to move V = [v.activitymap(lambda a: a.temporalpad( (-v.framerate()*1.0, 0) ) if a.category() == 'person_unloads_vehicle' else a) for v in V] # person_talks_to_person: Equal padding to minimum of five seconds # person_texting_on_phone: Equal padding to minimum of two seconds # Pad one second before, one second after before1after1 = set(['vehicle_turns_left', 'vehicle_turns_right', 'person_transfers_object', 'person_sets_down_object', 'hand_interacts_with_person', 'person_embraces_person', 'person_purchases', 'vehicle_picks_up_person', 'vehicle_drops_off_person']) V = [v.activitymap(lambda a: a.temporalpad(-v.framerate()*1.0) if a.category() in before1after1 else a) for v in V] # Pad zero second before, one second after before0after1 = set(['vehicle_makes_u_turn', 'person_picks_up_object']) V = [v.activitymap(lambda a: a.temporalpad( (0, -v.framerate()*1.0) ) if a.category() in before0after1 else a) for v in V] # person_abandons_package: two seconds before, two seconds after V = [v.activitymap(lambda a: a.temporalpad(-v.framerate()*2.0) if a.category() == 'person_abandons_package' else a) for v in V] self._vidlist = V # Remove empty tracks and activities self._vidlist = [v.trackfilter(lambda t: len(t) > 0) for v in self._vidlist] self._vidlist = [v.activityfilter(lambda a: len(a) > 0) for v in self._vidlist] def __getitem__(self, k): return self._vidlist[k] def __iter__(self): for v in self._vidlist: yield v def __len__(self): return len(self._vidlist) def __repr__(self): return str('<vipy.data.meva.KF1: videos=%d, videodir="%s", annotationdir="%s">' % (len(self), self.videodir, self.repodir)) def _activities_to_required_objects(self): """Return a dictionary of activity keys to set of required objects. This is currently wrong.""" raise ValueError('This mapping is currently wrong in the Kitware repository') d = readjson(os.path.join(self.repodir, 'annotation', 'DIVA-phase-2', 'activity-index.json')) return {a:set([x.replace('Construction_Vehicle', 'Vehicle') for x in d[a]['objectTypes']]) for a in self.activities()} def _get_activities_yaml(self): return sorted([x for x in findyaml(self.repodir) if 'activities.yml' in x]) def _get_geom_yaml(self): return sorted([x for x in findyaml(self.repodir) if 'geom.yml' in x]) def _get_types_yaml(self): return sorted([x for x in findyaml(self.repodir) if 'types.yml' in x]) def _get_fileindex_json(self): return sorted([x for x in findjson(self.repodir) if 'file-index.json' in x]) def _get_activities_json(self): return sorted([x for x in findjson(self.repodir) if 'activities.json' in x]) def _get_videos(self): return sorted([x for x in findvideo(self.videodir)]) def _parse_video(self, d_videoname_to_path, d_category_to_shortlabel, types_yamlfile, geom_yamlfile, act_yamlfile, stride=1, verbose=False, actor=False): """Reference: https://gitlab.kitware.com/meva/meva-data-repo/-/blob/master/documents/KPF-specification-v4.pdf""" # Read YAML if verbose: print('[vipy.data.meva.KF1]: Parsing "%s"' % (act_yamlfile)) geom_yaml = readyaml(geom_yamlfile) types_yaml = readyaml(types_yamlfile) act_yaml = readyaml(act_yamlfile) # Sanity check assert act_yamlfile.split('.')[:-2] == geom_yamlfile.split('.')[:-2], "Unmatched activity and geom yaml file" assert 'meta' not in types_yaml[0] or len(set([types_yaml[0]['meta'], geom_yaml[0]['meta'], act_yaml[0]['meta']]))==1, "Mismatched video name for '%s'" % act_yamlfile try: videoname = act_yaml[0]['meta'] if act_yaml[0]['meta'][-4:] != '.avi' else act_yaml[0]['meta'][0:-4] # strip .avi except: videoname = vipy.util.filetail(act_yamlfile.split('activities.yml')[0][0:-1]) # /path/to/filebase-activities.yml or /path/to/filebase.activities.yml [YUCK, necessary for newer drops in Spring 2021] if videoname not in d_videoname_to_path: if verbose: print('[vipy.data.meva.KF1]: Invalid MEVA video "%s" in "%s" - Ignoring' % (videoname, filebase(act_yamlfile))) return None # Parse video framerate = 30.0 # All videos are universally 30Hz (from Roddy) vid = Scene(filename=d_videoname_to_path[videoname], framerate=framerate) # Parse tracks d_id1_to_category = {} for t in types_yaml: if 'types' in t: d_id1_to_category[t['types']['id1']] = list(t['types']['cset3'].keys())[0] d_id1_to_track = {} d_geom_yaml = groupbyasdict([x['geom'] for x in geom_yaml if 'geom' in x], lambda v: v['id1']) assert stride >= 1, "Invalid stride" for (id1, geom_yaml) in d_geom_yaml.items(): geom_yaml = sorted(geom_yaml, key=lambda x: int(x['ts0'])) # increasing for (k_geom, v) in enumerate(geom_yaml): if stride > 1 and k_geom > 0 and (k_geom < (len(geom_yaml)-stride)) and (k_geom % stride != 0): continue # Use vipy track interpolation to speed up parsing keyframe = int(v['ts0']) bb = [int(x) for x in v['g0'].split(' ')] bbox = BoundingBox(xmin=bb[0], ymin=bb[1], xmax=bb[2], ymax=bb[3]) if not bbox.isvalid(): if verbose: print('[vipy.data.meva.KF1]: Invalid bounding box: id1=%s, bbox="%s", file="%s" - Ignoring' % (str(v['id1']), str(bbox), delpath(self.repodir, geom_yamlfile))) elif v['id1'] not in d_id1_to_track: d_id1_to_track[v['id1']] = Track(category=d_id1_to_category[v['id1']], framerate=framerate, keyframes=[keyframe], boxes=[bbox], boundary='strict') else: d_id1_to_track[v['id1']].add(keyframe=keyframe, bbox=bbox) # Add tracks to scene for (k,v) in d_id1_to_track.items(): try: vid.add(v, rangecheck=True) # throw exception if all tracks are outside the image rectangle except Exception as e: print('[vipy.data.meva.KF1]: track import error "%s" for trackid=%s, track=%s - SKIPPING' % (str(e), k, str(v))) # Category to actor: This defines the primary role for the activity (for tube based representations) f_activity_to_actor = lambda c: 'Person' if (c.split('_')[0] == 'person' or 'hand' in c) else 'Vehicle' # Parse activities for v in act_yaml: if 'act' in v: if 'act2' in v['act']: act2 = v['act']['act2'] if isinstance(act2, set): category = list(act2)[0] v['act']['act2'] = list(act2) # for JSON serialization elif isinstance(act2, dict): category = list(act2.keys())[0] else: raise ValueError('YAML parsing error for "%s"' % str(act2)) elif 'act3' in v['act']: act3 = v['act']['act3'] if isinstance(act3, set): category = list(act3)[0] v['act']['act3'] = list(act3) # for JSON serialization elif isinstance(act3, dict): category = list(act3.keys())[0] else: raise ValueError('YAML parsing error for "%s"' % str(act3)) else: raise ValueError('Invalid activity YAML - act2 or act3 must be specified') assert len(v['act']['timespan']) == 1, "Multi-span activities not parsed" if category not in self.categories(): if category in self._d_oldcategory_to_newcategory: category = self._d_oldcategory_to_newcategory[category] # rationalize else: raise ValueError('undefined category "%s"' % category) startframe = int(v['act']['timespan'][0]['tsr0'][0]) endframe = int(v['act']['timespan'][0]['tsr0'][1]) actorid = [x['id1'] for x in v['act']['actors']] if True: nounid = [d_id1_to_track[a].id() for a in actorid if (a in d_id1_to_track) and (f_activity_to_actor(category).lower() == d_id1_to_track[a].category().lower())] if len(nounid) == 0 and actor: print('[vipy.data.meva.KF1]: Warning - activity "%s" without a required primary actor "%s" - SKIPPING' % (category, f_activity_to_actor(category))) continue # skip it elif len(nounid) == 0: print('[vipy.data.meva.KF1]: Warning - activity "%s" without a required primary actor "%s"' % (category, f_activity_to_actor(category))) nounid = nounid[0] if len(nounid) > 0 else None # first track in activity of required object class for this category is assumed to be the performer/actor/noun for aid in actorid: if not aid in d_id1_to_track: print('[vipy.data.meva.KF1]: ActorID %d referenced in activity yaml "%s" not found in geom yaml "%s" - Skipping' % (aid, delpath(self.repodir, act_yamlfile), delpath(self.repodir, geom_yamlfile))) # Add activity to scene: include YAML file details in activity attributes for provenance if there are labeling bugs tracks = {d_id1_to_track[aid].id():d_id1_to_track[aid] for aid in actorid if aid in d_id1_to_track} # order preserving (python 3.6) if len(tracks) > 0: try: vid.add(Activity(category=category, shortlabel=d_category_to_shortlabel[category], actorid=nounid if actor else None, startframe=startframe, endframe=endframe, tracks=tracks, framerate=framerate, attributes={'act':str(v['act']), 'act_yaml':act_yamlfile, 'geom_yaml':geom_yamlfile}), rangecheck=True) except Exception as e: print('[vipy.data.meva.KF1]: activity import error "%s" for activity="%s" - SKIPPING' % (str(e), str(v))) return vid def videos(self): """Return list of activity videos""" return [v for v in self._vidlist if v is not None] def tolist(self): return self.videos() def instances(self, padframes=0): """Return list of activity instances""" if vipy.globals.max_workers() > 1: return [a for A in Batch(self.videos()).activityclip(padframes=padframes).result() for a in A] else: warnings.warn('Consider setting vipy.globals.max_workers(n) for n>1 to speed this up') return [a for v in self.videos() for a in v.activityclip(padframes=padframes)] def categories(self): """Return a list of activity categories""" return sorted(list(self._d_category_to_shortlabel.keys())) def analysis(self, outdir=None): """Analyze the MEVA dataset to return helpful statistics and plots""" import matplotlib.pyplot as plt import vipy.metrics videos = self._vidlist scenes = flatlist([m.activityclip() for m in videos if m is not None]) activities = flatlist([s.activities().values() for s in scenes]) tracks = flatlist([s.tracks().values() for s in scenes]) outdir = tempdir() if outdir is None else outdir # Category distributions d = {} d['activity_categories'] = set([a.category() for a in activities]) d['object_categories'] = set([t.category() for t in tracks]) d['videos'] = set([v.filename() for v in videos if v is not None]) d['num_activities'] = sorted([(k,len(v)) for (k,v) in groupbyasdict(activities, lambda a: a.category()).items()], key=lambda x: x[1]) d['video_density'] = sorted([(v.filename(),len(v.activities())) for v in videos if v is not None], key=lambda x: x[1]) # Histogram of instances (categories, freq) = zip(*reversed(d['num_activities'])) barcolors = ['blue' if not 'vehicle' in c else 'green' for c in categories] d['num_activities_histogram'] = vipy.metrics.histogram(freq, categories, barcolors=barcolors, outfile=os.path.join(outdir, 'num_activities_histogram.pdf'), ylabel='Instances') colors = colorlist() # Scatterplot of people and vehicles box sizes (x, y) = zip(*[(t.meanshape()[1], t.meanshape()[0]) for t in tracks]) plt.clf() plt.figure() plt.grid(True) d_category_to_color = {'person':'blue', 'vehicle':'green'} for c in ['person', 'vehicle']: (xc, yc) = zip(*[(t.meanshape()[1], t.meanshape()[0]) for t in tracks if t.category() == c]) plt.scatter(xc, yc, c=d_category_to_color[c], label=c) plt.xlabel('bounding box (width)') plt.ylabel('bounding box (height)') plt.axis([0, 1000, 0, 1000]) plt.legend() plt.gca().set_axisbelow(True) d['object_bounding_box_scatterplot'] = os.path.join(outdir, 'object_bounding_box_scatterplot.pdf') plt.savefig(d['object_bounding_box_scatterplot']) # 2D histogram of people and vehicles box sizes for c in ['person', 'vehicle']: (xc, yc) = zip(*[(t.meanshape()[1], t.meanshape()[0]) for t in tracks if t.category() == c]) plt.clf() plt.figure() plt.hist2d(xc, yc, bins=10) plt.xlabel('Bounding box (width)') plt.ylabel('Bounding box (height)') d['2D_%s_bounding_box_histogram' % c] = os.path.join(outdir, '2D_%s_bounding_box_histogram.pdf' % c) plt.savefig(d['2D_%s_bounding_box_histogram' % c]) # Mean track size per activity category d_category_to_xy = {k:np.mean([t.meanshape() for v in vlist for t in v.tracklist()], axis=0) for (k,vlist) in groupbyasdict(scenes, lambda v: v.category()).items()} plt.clf() plt.figure() plt.grid(True) d_category_to_color = {c:colors[k % len(colors)] for (k,c) in enumerate(d_category_to_xy.keys())} for c in d_category_to_xy.keys(): (xc, yc) = d_category_to_xy[c] plt.scatter(xc, yc, c=d_category_to_color[c], label=c) plt.xlabel('bounding box (width)') plt.ylabel('bounding box (height)') plt.axis([0, 600, 0, 600]) plt.gca().set_axisbelow(True) lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.) d['activity_bounding_box_scatterplot'] = os.path.join(outdir, 'activity_bounding_box_scatterplot.pdf') plt.savefig(d['activity_bounding_box_scatterplot'], bbox_extra_artists=(lgd,), bbox_inches='tight') return d def review(self, outfile=None, mindim=512): """Generate a standalone HTML file containing quicklooks for each annotated activity in dataset, along with some helpful provenance information for where the annotation came from""" if vipy.globals.max_workers() == 1: warnings.warn("Generating review HTML is very time consuming, consider setting vipy.global.max_workers(n) for n > 1 for parallel video processing") quicklist = Batch(self._vidlist).map(lambda v: [(c.load().quicklook(context=True), c.flush()) for c in v.mindim(512).activityclip()]).result() quicklooks = [imq for q in quicklist for (imq, c) in q] # for HTML display purposes provenance = [{'clip':str(c), 'activities':str(';'.join([str(a) for a in c.activitylist()])), 'category':c.category(), 'yamlfile':c.activitylist()[0].attributes['act_yaml']} for q in quicklist for (imq, c) in q] (quicklooks, provenance) = zip(*sorted([(q,p) for (q,p) in zip(quicklooks, provenance)], key=lambda x: x[1]['category'])) # sorted in category order return vipy.visualize.tohtml(quicklooks, provenance, title='MEVA-KF1 annotation quicklooks', outfile=outfile, mindim=mindim)
Methods
def analysis(self, outdir=None)
-
Analyze the MEVA dataset to return helpful statistics and plots
Expand source code Browse git
def analysis(self, outdir=None): """Analyze the MEVA dataset to return helpful statistics and plots""" import matplotlib.pyplot as plt import vipy.metrics videos = self._vidlist scenes = flatlist([m.activityclip() for m in videos if m is not None]) activities = flatlist([s.activities().values() for s in scenes]) tracks = flatlist([s.tracks().values() for s in scenes]) outdir = tempdir() if outdir is None else outdir # Category distributions d = {} d['activity_categories'] = set([a.category() for a in activities]) d['object_categories'] = set([t.category() for t in tracks]) d['videos'] = set([v.filename() for v in videos if v is not None]) d['num_activities'] = sorted([(k,len(v)) for (k,v) in groupbyasdict(activities, lambda a: a.category()).items()], key=lambda x: x[1]) d['video_density'] = sorted([(v.filename(),len(v.activities())) for v in videos if v is not None], key=lambda x: x[1]) # Histogram of instances (categories, freq) = zip(*reversed(d['num_activities'])) barcolors = ['blue' if not 'vehicle' in c else 'green' for c in categories] d['num_activities_histogram'] = vipy.metrics.histogram(freq, categories, barcolors=barcolors, outfile=os.path.join(outdir, 'num_activities_histogram.pdf'), ylabel='Instances') colors = colorlist() # Scatterplot of people and vehicles box sizes (x, y) = zip(*[(t.meanshape()[1], t.meanshape()[0]) for t in tracks]) plt.clf() plt.figure() plt.grid(True) d_category_to_color = {'person':'blue', 'vehicle':'green'} for c in ['person', 'vehicle']: (xc, yc) = zip(*[(t.meanshape()[1], t.meanshape()[0]) for t in tracks if t.category() == c]) plt.scatter(xc, yc, c=d_category_to_color[c], label=c) plt.xlabel('bounding box (width)') plt.ylabel('bounding box (height)') plt.axis([0, 1000, 0, 1000]) plt.legend() plt.gca().set_axisbelow(True) d['object_bounding_box_scatterplot'] = os.path.join(outdir, 'object_bounding_box_scatterplot.pdf') plt.savefig(d['object_bounding_box_scatterplot']) # 2D histogram of people and vehicles box sizes for c in ['person', 'vehicle']: (xc, yc) = zip(*[(t.meanshape()[1], t.meanshape()[0]) for t in tracks if t.category() == c]) plt.clf() plt.figure() plt.hist2d(xc, yc, bins=10) plt.xlabel('Bounding box (width)') plt.ylabel('Bounding box (height)') d['2D_%s_bounding_box_histogram' % c] = os.path.join(outdir, '2D_%s_bounding_box_histogram.pdf' % c) plt.savefig(d['2D_%s_bounding_box_histogram' % c]) # Mean track size per activity category d_category_to_xy = {k:np.mean([t.meanshape() for v in vlist for t in v.tracklist()], axis=0) for (k,vlist) in groupbyasdict(scenes, lambda v: v.category()).items()} plt.clf() plt.figure() plt.grid(True) d_category_to_color = {c:colors[k % len(colors)] for (k,c) in enumerate(d_category_to_xy.keys())} for c in d_category_to_xy.keys(): (xc, yc) = d_category_to_xy[c] plt.scatter(xc, yc, c=d_category_to_color[c], label=c) plt.xlabel('bounding box (width)') plt.ylabel('bounding box (height)') plt.axis([0, 600, 0, 600]) plt.gca().set_axisbelow(True) lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.) d['activity_bounding_box_scatterplot'] = os.path.join(outdir, 'activity_bounding_box_scatterplot.pdf') plt.savefig(d['activity_bounding_box_scatterplot'], bbox_extra_artists=(lgd,), bbox_inches='tight') return d
def categories(self)
-
Return a list of activity categories
Expand source code Browse git
def categories(self): """Return a list of activity categories""" return sorted(list(self._d_category_to_shortlabel.keys()))
def instances(self, padframes=0)
-
Return list of activity instances
Expand source code Browse git
def instances(self, padframes=0): """Return list of activity instances""" if vipy.globals.max_workers() > 1: return [a for A in Batch(self.videos()).activityclip(padframes=padframes).result() for a in A] else: warnings.warn('Consider setting vipy.globals.max_workers(n) for n>1 to speed this up') return [a for v in self.videos() for a in v.activityclip(padframes=padframes)]
def review(self, outfile=None, mindim=512)
-
Generate a standalone HTML file containing quicklooks for each annotated activity in dataset, along with some helpful provenance information for where the annotation came from
Expand source code Browse git
def review(self, outfile=None, mindim=512): """Generate a standalone HTML file containing quicklooks for each annotated activity in dataset, along with some helpful provenance information for where the annotation came from""" if vipy.globals.max_workers() == 1: warnings.warn("Generating review HTML is very time consuming, consider setting vipy.global.max_workers(n) for n > 1 for parallel video processing") quicklist = Batch(self._vidlist).map(lambda v: [(c.load().quicklook(context=True), c.flush()) for c in v.mindim(512).activityclip()]).result() quicklooks = [imq for q in quicklist for (imq, c) in q] # for HTML display purposes provenance = [{'clip':str(c), 'activities':str(';'.join([str(a) for a in c.activitylist()])), 'category':c.category(), 'yamlfile':c.activitylist()[0].attributes['act_yaml']} for q in quicklist for (imq, c) in q] (quicklooks, provenance) = zip(*sorted([(q,p) for (q,p) in zip(quicklooks, provenance)], key=lambda x: x[1]['category'])) # sorted in category order return vipy.visualize.tohtml(quicklooks, provenance, title='MEVA-KF1 annotation quicklooks', outfile=outfile, mindim=mindim)
def tolist(self)
-
Expand source code Browse git
def tolist(self): return self.videos()
def videos(self)
-
Return list of activity videos
Expand source code Browse git
def videos(self): """Return list of activity videos""" return [v for v in self._vidlist if v is not None]