Module vipy.data.ucf101
Expand source code Browse git
import os
from vipy.dataset import Dataset
from vipy.video import VideoCategory
from vipy.util import remkdir, filetail, isvideo, isinstalled
import vipy.downloader
import re
URL = 'https://www.crcv.ucf.edu/data/UCF101/UCF101.rar'
SHA1 = None
class UCF101(Dataset):
def __init__(self, datadir):
self.datadir = remkdir(datadir)
if not os.path.exists(os.path.join(datadir, filetail(URL))):
if not isinstalled('wget'):
raise ValueError('Downloading requires the wget utility on the command line. On Ubuntu: "sudo apt install wget"')
os.system('wget --no-check-certificate --continue --tries=32 -O %s %s ' % (os.path.join(self.datadir, filetail(URL)), URL)) # server fails many times, need smart continue
if not len(vipy.util.videolist(datadir)) > 1:
if not isinstalled('unrar'):
raise ValueError('Unpacking requires the unrar utility on the command line. On Ubuntu: "sudo apt install unrar"')
os.system('unrar e %s %s' % (os.path.join(self.datadir, filetail(URL)), self.datadir))
super().__init__([VideoCategory(filename=f, category=filetail(f).split('_')[1]) for f in vipy.util.videolist(self.datadir)], 'ucf101')
def as_space_separated_category(self):
return self.map(lambda im: im.new_category(UCF101.to_space_separated_category(im.category())))
@staticmethod
def to_space_separated_category(c):
"""Convert CamelCase to a space separated phrase"""
return ' '.join(re.findall(r'[A-Z][a-z]*', c))
Classes
class UCF101 (datadir)-
vipy.dataset.Dataset() class
Common class to manipulate large sets of objects in parallel
Args
- dataset [list, tuple, set, obj]: a python built-in type that supports indexing or a generic object that supports indexing and has a length
- id [str]: an optional id of this dataset, which provides a descriptive name of the dataset
- loader [callable]: a callable loader that will construct the object from a raw data element in dataset. This is useful for custom deerialization or on demand transformations Datasets can be indexed, shuffled, iterated, minibatched, sorted, sampled, partitioned. Datasets constructed of vipy objects are lazy loaded, delaying loading pixels until they are needed
(trainset, valset, testset) = vipy.dataset.registry('mnist') (trainset, valset) = trainset.partition(0.9, 0.1) categories = trainset.set(lambda im: im.category()) smaller = testset.take(1024) preprocessed = smaller.map(lambda im: im.resize(32, 32).gain(1/256)) for b in preprocessed.minibatch(128): print(b) # visualize the dataset (trainset, valset, testset) = vipy.dataset.registry('pascal_voc_2007') for im in trainset: im.mindim(1024).show().print(sleep=1).close()Datasets can be constructed from directories of json files or image files (
Dataset.from_directory()) Datasets can be constructed from a single json file containing a list of objects (Dataset.from_json())Note: that if a lambda function is provided as loader then this dataset is not serializable. Use self.load() then serialize
Expand source code Browse git
class UCF101(Dataset): def __init__(self, datadir): self.datadir = remkdir(datadir) if not os.path.exists(os.path.join(datadir, filetail(URL))): if not isinstalled('wget'): raise ValueError('Downloading requires the wget utility on the command line. On Ubuntu: "sudo apt install wget"') os.system('wget --no-check-certificate --continue --tries=32 -O %s %s ' % (os.path.join(self.datadir, filetail(URL)), URL)) # server fails many times, need smart continue if not len(vipy.util.videolist(datadir)) > 1: if not isinstalled('unrar'): raise ValueError('Unpacking requires the unrar utility on the command line. On Ubuntu: "sudo apt install unrar"') os.system('unrar e %s %s' % (os.path.join(self.datadir, filetail(URL)), self.datadir)) super().__init__([VideoCategory(filename=f, category=filetail(f).split('_')[1]) for f in vipy.util.videolist(self.datadir)], 'ucf101') def as_space_separated_category(self): return self.map(lambda im: im.new_category(UCF101.to_space_separated_category(im.category()))) @staticmethod def to_space_separated_category(c): """Convert CamelCase to a space separated phrase""" return ' '.join(re.findall(r'[A-Z][a-z]*', c))Ancestors
Static methods
def to_space_separated_category(c)-
Convert CamelCase to a space separated phrase
Expand source code Browse git
@staticmethod def to_space_separated_category(c): """Convert CamelCase to a space separated phrase""" return ' '.join(re.findall(r'[A-Z][a-z]*', c))
Methods
def as_space_separated_category(self)-
Expand source code Browse git
def as_space_separated_category(self): return self.map(lambda im: im.new_category(UCF101.to_space_separated_category(im.category())))
Inherited members
Dataset:balancedbatchchunkchunksclonecounteven_splitfilterfrequencyfrom_directoryfrom_image_urlsgroupbyididentity_shufflerindexinverse_frequencylistloadlocalmapmapminibatchpartitionpipelinerawrepeatsamplesetshiftshuffleslicesortsplitstreaming_mapstreaming_shufflertaketake_fractiontakebytakelisttakeonetruncatetupleuniform_shufflerzip