Module vipy.data.charades
Expand source code Browse git
import os
from vipy.util import filetail, remkdir, readjson, groupbyasdict, filefull, readlist, readcsv
from vipy.video import VideoCategory, Video, Scene
from vipy.object import Track, BoundingBox
from vipy.activity import Activity
URL_ANNOTATIONS = 'http://ai2-website.s3.amazonaws.com/data/Charades.zip'
URL_DATA = 'http://ai2-website.s3.amazonaws.com/data/Charades_v1.zip'
class Charades(object):
def __init__(self, datadir, annodir):
"""Charades, provide paths such that datadir contains the contents of 'http://ai2-website.s3.amazonaws.com/data/Charades_v1.zip' and annodir contains 'http://ai2-website.s3.amazonaws.com/data/Charades.zip'.
These are license restructed and must be downloaded by the user"""
self.datadir = datadir
self.annodir = annodir
if not self._isdownloaded():
raise ValueError('Not downloaded')
def __repr__(self):
return str('<vipy.data.charades: datadir="%s", annotations="%s">' % (self.datadir, self.annodir))
def _isdownloaded(self):
return os.path.exists(os.path.join(self.annodir, 'Charades_v1_train.csv')) and os.path.exists(os.path.join(self.datadir, 'ZZXQF.mp4'))
def _dataset(self, csvfile):
csv = readcsv(csvfile)
d_index_to_category = self.categories()
d_videoid_to_video = {}
for row in csv[1:]:
videoid = row[0]
actions = row[-2]
sceneloc = row[2]
if videoid not in d_videoid_to_video:
v = Scene(filename=os.path.join(self.datadir, '%s.mp4' % videoid), category=sceneloc)
fps = v.probe()['streams'][0]['avg_frame_rate']
fps = float(fps.split('/')[0]) / float(fps.split('/')[1])
v.framerate(fps) # FIXME: better handling of time based clips to avoid ffprobe
d_videoid_to_video[videoid] = v
assert d_videoid_to_video[videoid].category() == sceneloc
if len(actions) > 0:
for a in actions.split(';'):
(category, startsec, endsec) = a.split(' ')
try:
d_videoid_to_video[videoid].add(Activity(category=d_index_to_category[category], startframe=float(startsec)*fps, endframe=float(endsec)*fps, attributes={'csvfile':row}))
except KeyboardInterrupt:
raise
except Exception as e:
print('[vipy.data.charades]: SKIPPING invalid activity row="%s" with error "%s"' % (str(row),str(e)))
return list(d_videoid_to_video.values())
def categories(self):
return {x.split(' ', 1)[0]:x.split(' ', 1)[1].strip() for x in readlist(os.path.join(self.annodir, 'Charades_v1_classes.txt'))}
def trainset(self):
return self._dataset(os.path.join(self.annodir, 'Charades_v1_train.csv'))
def testset(self):
return self._dataset(os.path.join(self.annodir, 'Charades_v1_test.csv'))
def review(self, outfile=None, mindim=1024, n=25):
"""Generate a standalone HTML file containing quicklooks for each annotated activity in the train set"""
from vipy.batch import Batch
import vipy.visualize
T = self.trainset()
quicklist = Batch(T).map(lambda v: [(c.load().quicklook(n=n), c.activitylist(), str(c.flush().print())) for c in v.mindim(512).activityclip()]).result()
quicklooks = [imq for q in quicklist for (imq, activitylist, description) in q] # for HTML display purposes
provenance = [{'clip':str(description), 'activity':str(a), 'category':a.category(), 'train.csv':a.attributes['csvfile']} for q in quicklist for (imq, activitylist, description) in q for a in activitylist]
(quicklooks, provenance) = zip(*sorted([(q,p) for (q,p) in zip(quicklooks, provenance)], key=lambda x: x[1]['category'])) # sorted in category order
return vipy.visualize.tohtml(quicklooks, provenance, title='Charades trainset quicklooks', outfile=outfile, mindim=mindim)
Classes
class Charades (datadir, annodir)
-
Charades, provide paths such that datadir contains the contents of 'http://ai2-website.s3.amazonaws.com/data/Charades_v1.zip' and annodir contains 'http://ai2-website.s3.amazonaws.com/data/Charades.zip'.
These are license restructed and must be downloaded by the userExpand source code Browse git
class Charades(object): def __init__(self, datadir, annodir): """Charades, provide paths such that datadir contains the contents of 'http://ai2-website.s3.amazonaws.com/data/Charades_v1.zip' and annodir contains 'http://ai2-website.s3.amazonaws.com/data/Charades.zip'. These are license restructed and must be downloaded by the user""" self.datadir = datadir self.annodir = annodir if not self._isdownloaded(): raise ValueError('Not downloaded') def __repr__(self): return str('<vipy.data.charades: datadir="%s", annotations="%s">' % (self.datadir, self.annodir)) def _isdownloaded(self): return os.path.exists(os.path.join(self.annodir, 'Charades_v1_train.csv')) and os.path.exists(os.path.join(self.datadir, 'ZZXQF.mp4')) def _dataset(self, csvfile): csv = readcsv(csvfile) d_index_to_category = self.categories() d_videoid_to_video = {} for row in csv[1:]: videoid = row[0] actions = row[-2] sceneloc = row[2] if videoid not in d_videoid_to_video: v = Scene(filename=os.path.join(self.datadir, '%s.mp4' % videoid), category=sceneloc) fps = v.probe()['streams'][0]['avg_frame_rate'] fps = float(fps.split('/')[0]) / float(fps.split('/')[1]) v.framerate(fps) # FIXME: better handling of time based clips to avoid ffprobe d_videoid_to_video[videoid] = v assert d_videoid_to_video[videoid].category() == sceneloc if len(actions) > 0: for a in actions.split(';'): (category, startsec, endsec) = a.split(' ') try: d_videoid_to_video[videoid].add(Activity(category=d_index_to_category[category], startframe=float(startsec)*fps, endframe=float(endsec)*fps, attributes={'csvfile':row})) except KeyboardInterrupt: raise except Exception as e: print('[vipy.data.charades]: SKIPPING invalid activity row="%s" with error "%s"' % (str(row),str(e))) return list(d_videoid_to_video.values()) def categories(self): return {x.split(' ', 1)[0]:x.split(' ', 1)[1].strip() for x in readlist(os.path.join(self.annodir, 'Charades_v1_classes.txt'))} def trainset(self): return self._dataset(os.path.join(self.annodir, 'Charades_v1_train.csv')) def testset(self): return self._dataset(os.path.join(self.annodir, 'Charades_v1_test.csv')) def review(self, outfile=None, mindim=1024, n=25): """Generate a standalone HTML file containing quicklooks for each annotated activity in the train set""" from vipy.batch import Batch import vipy.visualize T = self.trainset() quicklist = Batch(T).map(lambda v: [(c.load().quicklook(n=n), c.activitylist(), str(c.flush().print())) for c in v.mindim(512).activityclip()]).result() quicklooks = [imq for q in quicklist for (imq, activitylist, description) in q] # for HTML display purposes provenance = [{'clip':str(description), 'activity':str(a), 'category':a.category(), 'train.csv':a.attributes['csvfile']} for q in quicklist for (imq, activitylist, description) in q for a in activitylist] (quicklooks, provenance) = zip(*sorted([(q,p) for (q,p) in zip(quicklooks, provenance)], key=lambda x: x[1]['category'])) # sorted in category order return vipy.visualize.tohtml(quicklooks, provenance, title='Charades trainset quicklooks', outfile=outfile, mindim=mindim)
Methods
def categories(self)
-
Expand source code Browse git
def categories(self): return {x.split(' ', 1)[0]:x.split(' ', 1)[1].strip() for x in readlist(os.path.join(self.annodir, 'Charades_v1_classes.txt'))}
def review(self, outfile=None, mindim=1024, n=25)
-
Generate a standalone HTML file containing quicklooks for each annotated activity in the train set
Expand source code Browse git
def review(self, outfile=None, mindim=1024, n=25): """Generate a standalone HTML file containing quicklooks for each annotated activity in the train set""" from vipy.batch import Batch import vipy.visualize T = self.trainset() quicklist = Batch(T).map(lambda v: [(c.load().quicklook(n=n), c.activitylist(), str(c.flush().print())) for c in v.mindim(512).activityclip()]).result() quicklooks = [imq for q in quicklist for (imq, activitylist, description) in q] # for HTML display purposes provenance = [{'clip':str(description), 'activity':str(a), 'category':a.category(), 'train.csv':a.attributes['csvfile']} for q in quicklist for (imq, activitylist, description) in q for a in activitylist] (quicklooks, provenance) = zip(*sorted([(q,p) for (q,p) in zip(quicklooks, provenance)], key=lambda x: x[1]['category'])) # sorted in category order return vipy.visualize.tohtml(quicklooks, provenance, title='Charades trainset quicklooks', outfile=outfile, mindim=mindim)
def testset(self)
-
Expand source code Browse git
def testset(self): return self._dataset(os.path.join(self.annodir, 'Charades_v1_test.csv'))
def trainset(self)
-
Expand source code Browse git
def trainset(self): return self._dataset(os.path.join(self.annodir, 'Charades_v1_train.csv'))