Module vipy.data.kinetics

Expand source code Browse git
import os
from vipy.util import remkdir, readjson, groupbyasdict
import vipy.downloader
from vipy.video import VideoCategory
import numpy as np


class Kinetics700(object):
    def __init__(self, datadir):
        """Kinetics, provide a datadir='/path/to/store/kinetics' """
        self.datadir = remkdir(datadir)
        self._url = 'https://storage.googleapis.com/deepmind-media/Datasets/kinetics700.tar.gz'
        self._name = 'kinetics700'
        if not self._isdownloaded():
            self.download()
        
    def __repr__(self):
        return str('<vipy.data.%s: "%s/%s">' % (self._name, self.datadir, self._name))

    def download(self, verbose=True):
        vipy.downloader.download_and_unpack(self._url, self.datadir, verbose=verbose)
        return self

    def isdownloaded(self):
        return (os.path.exists(os.path.join(self.datadir, 'kinetics700.tar.gz')) or
                os.path.exists(os.path.join(self.datadir, self._name, 'train.json')))
    
    def _dataset(self, jsonfile):
        assert self.isdownloaded(), "Dataset not downloaded.  download() first or manually download '%s' to '%s' and unpack the tarball there" % (self._url, self.datadir)
        return [VideoCategory(url=v['url'],
                              filename=os.path.join(self.datadir, self._name, youtubeid),
                              category=v['annotations']['label'],
                              startsec=float(v['annotations']['segment'][0]),
                              endsec=float(v['annotations']['segment'][1]))
                for (youtubeid, v) in readjson(jsonfile).items()]

    def trainset(self):
        return self._dataset(os.path.join(self.datadir, self._name, 'train.json'))

    def testset(self):
        return self._dataset(os.path.join(self.datadir, self._name, 'test.json'))

    def valset(self):
        return self._dataset(os.path.join(self.datadir, self._name, 'validate.json'))

    def categories(self):
        jsonfile = os.path.join(self.datadir, self._name, 'train.json')
        return set([v['annotations']['label'] for (youtubeid, v) in readjson(jsonfile).items()])
        
    def analysis(self):
        C = self.categories()
        d_category_to_trainsize = {k:len(v) for (k,v) in groupbyasdict(self.trainset(), lambda x: x.category()).items()}

        top10 = sorted([(k,v) for (k,v) in d_category_to_trainsize.items()], key=lambda x: x[1])[-10:]
        print('top10 categories by number of instances in training set:')
        print(top10)

        bottom10 = sorted([(k,v) for (k,v) in d_category_to_trainsize.items()], key=lambda x: x[1])[0:10]
        print('bottom-10 categories by number of instances in training set:')
        print(bottom10)
        
        return d_category_to_trainsize

        
class Kinetics600(Kinetics700):
    def __init__(self, datadir):
        """Kinetics, provide a datadir='/path/to/store/kinetics' """
        self.datadir = remkdir(datadir)
        self._url = 'https://storage.googleapis.com/deepmind-media/Datasets/kinetics600.tar.gz'
        self._name = 'kinetics600'


class Kinetics400(Kinetics700):
    def __init__(self, datadir):
        """Kinetics, provide a datadir='/path/to/store/kinetics' """
        self.datadir = remkdir(datadir)
        self._url = 'https://storage.googleapis.com/deepmind-media/Datasets/kinetics400.tar.gz'
        self._name = 'kinetics400'

Classes

class Kinetics400 (datadir)

Kinetics, provide a datadir='/path/to/store/kinetics'

Expand source code Browse git
class Kinetics400(Kinetics700):
    def __init__(self, datadir):
        """Kinetics, provide a datadir='/path/to/store/kinetics' """
        self.datadir = remkdir(datadir)
        self._url = 'https://storage.googleapis.com/deepmind-media/Datasets/kinetics400.tar.gz'
        self._name = 'kinetics400'

Ancestors

class Kinetics600 (datadir)

Kinetics, provide a datadir='/path/to/store/kinetics'

Expand source code Browse git
class Kinetics600(Kinetics700):
    def __init__(self, datadir):
        """Kinetics, provide a datadir='/path/to/store/kinetics' """
        self.datadir = remkdir(datadir)
        self._url = 'https://storage.googleapis.com/deepmind-media/Datasets/kinetics600.tar.gz'
        self._name = 'kinetics600'

Ancestors

class Kinetics700 (datadir)

Kinetics, provide a datadir='/path/to/store/kinetics'

Expand source code Browse git
class Kinetics700(object):
    def __init__(self, datadir):
        """Kinetics, provide a datadir='/path/to/store/kinetics' """
        self.datadir = remkdir(datadir)
        self._url = 'https://storage.googleapis.com/deepmind-media/Datasets/kinetics700.tar.gz'
        self._name = 'kinetics700'
        if not self._isdownloaded():
            self.download()
        
    def __repr__(self):
        return str('<vipy.data.%s: "%s/%s">' % (self._name, self.datadir, self._name))

    def download(self, verbose=True):
        vipy.downloader.download_and_unpack(self._url, self.datadir, verbose=verbose)
        return self

    def isdownloaded(self):
        return (os.path.exists(os.path.join(self.datadir, 'kinetics700.tar.gz')) or
                os.path.exists(os.path.join(self.datadir, self._name, 'train.json')))
    
    def _dataset(self, jsonfile):
        assert self.isdownloaded(), "Dataset not downloaded.  download() first or manually download '%s' to '%s' and unpack the tarball there" % (self._url, self.datadir)
        return [VideoCategory(url=v['url'],
                              filename=os.path.join(self.datadir, self._name, youtubeid),
                              category=v['annotations']['label'],
                              startsec=float(v['annotations']['segment'][0]),
                              endsec=float(v['annotations']['segment'][1]))
                for (youtubeid, v) in readjson(jsonfile).items()]

    def trainset(self):
        return self._dataset(os.path.join(self.datadir, self._name, 'train.json'))

    def testset(self):
        return self._dataset(os.path.join(self.datadir, self._name, 'test.json'))

    def valset(self):
        return self._dataset(os.path.join(self.datadir, self._name, 'validate.json'))

    def categories(self):
        jsonfile = os.path.join(self.datadir, self._name, 'train.json')
        return set([v['annotations']['label'] for (youtubeid, v) in readjson(jsonfile).items()])
        
    def analysis(self):
        C = self.categories()
        d_category_to_trainsize = {k:len(v) for (k,v) in groupbyasdict(self.trainset(), lambda x: x.category()).items()}

        top10 = sorted([(k,v) for (k,v) in d_category_to_trainsize.items()], key=lambda x: x[1])[-10:]
        print('top10 categories by number of instances in training set:')
        print(top10)

        bottom10 = sorted([(k,v) for (k,v) in d_category_to_trainsize.items()], key=lambda x: x[1])[0:10]
        print('bottom-10 categories by number of instances in training set:')
        print(bottom10)
        
        return d_category_to_trainsize

Subclasses

Methods

def analysis(self)
Expand source code Browse git
def analysis(self):
    C = self.categories()
    d_category_to_trainsize = {k:len(v) for (k,v) in groupbyasdict(self.trainset(), lambda x: x.category()).items()}

    top10 = sorted([(k,v) for (k,v) in d_category_to_trainsize.items()], key=lambda x: x[1])[-10:]
    print('top10 categories by number of instances in training set:')
    print(top10)

    bottom10 = sorted([(k,v) for (k,v) in d_category_to_trainsize.items()], key=lambda x: x[1])[0:10]
    print('bottom-10 categories by number of instances in training set:')
    print(bottom10)
    
    return d_category_to_trainsize
def categories(self)
Expand source code Browse git
def categories(self):
    jsonfile = os.path.join(self.datadir, self._name, 'train.json')
    return set([v['annotations']['label'] for (youtubeid, v) in readjson(jsonfile).items()])
def download(self, verbose=True)
Expand source code Browse git
def download(self, verbose=True):
    vipy.downloader.download_and_unpack(self._url, self.datadir, verbose=verbose)
    return self
def isdownloaded(self)
Expand source code Browse git
def isdownloaded(self):
    return (os.path.exists(os.path.join(self.datadir, 'kinetics700.tar.gz')) or
            os.path.exists(os.path.join(self.datadir, self._name, 'train.json')))
def testset(self)
Expand source code Browse git
def testset(self):
    return self._dataset(os.path.join(self.datadir, self._name, 'test.json'))
def trainset(self)
Expand source code Browse git
def trainset(self):
    return self._dataset(os.path.join(self.datadir, self._name, 'train.json'))
def valset(self)
Expand source code Browse git
def valset(self):
    return self._dataset(os.path.join(self.datadir, self._name, 'validate.json'))