Module vipy.data.ethzshapes
Expand source code Browse git
import os
import vipy
from vipy.util import remkdir, isjpg
from vipy.image import ImageDetection
import vipy.downloader
URL = 'https://ethz.ch/content/dam/ethz/special-interest/itet/cvl/vision-dam/datasets/Dataset-information/ethz_shape_classes_v12.tgz'
SHA1 = 'ae9b8fad2d170e098e5126ea9181d0843505a84b'
SUBDIR = 'ETHZShapeClasses-V1.2'
LABELS = ['Applelogos','Bottles','Giraffes','Mugs','Swans']
class ETHZShapes(vipy.dataset.Dataset):
def __init__(self, datadir=None, redownload=False):
"""ETHZShapes, provide a datadir='/path/to/store/ethzshapes' """
datadir = tocache('ethzshapes') if datadir is None else datadir
self._datadir = remkdir(datadir)
if redownload or not os.path.exists(os.path.join(self._datadir, '.complete')):
vipy.downloader.download_and_unpack(URL, self._datadir, sha1=SHA1)
categorydir = LABELS
imlist = []
for (idx_category, category) in enumerate(categorydir):
imdir = os.path.join(self._datadir, SUBDIR, category)
for filename in os.listdir(imdir):
if isjpg(filename) and not filename.startswith('.'):
# Write image
im = os.path.join(self._datadir, SUBDIR, category, filename)
# Write detections
gtfile = os.path.join(self._datadir, SUBDIR, category, os.path.splitext(os.path.basename(filename))[0] + '_' + category.lower() + '.groundtruth')
if not os.path.isfile(gtfile):
gtfile = os.path.join(self._datadir, SUBDIR, category, os.path.splitext(os.path.basename(filename))[0] + '_' + category.lower() + 's.groundtruth') # plural hack
for line in open(gtfile,'r'):
if line.strip() == '':
continue
(xmin,ymin,xmax,ymax) = line.strip().split()
imlist.append( (im, category, xmin, ymin, xmax, ymax) )
loader = lambda x: ImageDetection(filename=x[0], category=x[1], xmin=x[2], ymin=x[3], xmax=x[4], ymax=x[5])
super().__init__(imlist, id='ethzshapes', loader=loader)
open(os.path.join(self._datadir, '.complete'), 'a').close()
Classes
class ETHZShapes (datadir=None, redownload=False)-
vipy.dataset.Dataset() class
Common class to manipulate large sets of objects in parallel
Args
- dataset [list, tuple, set, obj]: a python built-in type that supports indexing or a generic object that supports indexing and has a length
- id [str]: an optional id of this dataset, which provides a descriptive name of the dataset
- loader [callable]: a callable loader that will construct the object from a raw data element in dataset. This is useful for custom deerialization or on demand transformations Datasets can be indexed, shuffled, iterated, minibatched, sorted, sampled, partitioned. Datasets constructed of vipy objects are lazy loaded, delaying loading pixels until they are needed
(trainset, valset, testset) = vipy.dataset.registry('mnist') (trainset, valset) = trainset.partition(0.9, 0.1) categories = trainset.set(lambda im: im.category()) smaller = testset.take(1024) preprocessed = smaller.map(lambda im: im.resize(32, 32).gain(1/256)) for b in preprocessed.minibatch(128): print(b) # visualize the dataset (trainset, valset, testset) = vipy.dataset.registry('pascal_voc_2007') for im in trainset: im.mindim(1024).show().print(sleep=1).close()Datasets can be constructed from directories of json files or image files (
Dataset.from_directory()) Datasets can be constructed from a single json file containing a list of objects (Dataset.from_json())Note: that if a lambda function is provided as loader then this dataset is not serializable. Use self.load() then serialize
ETHZShapes, provide a datadir='/path/to/store/ethzshapes'
Expand source code Browse git
class ETHZShapes(vipy.dataset.Dataset): def __init__(self, datadir=None, redownload=False): """ETHZShapes, provide a datadir='/path/to/store/ethzshapes' """ datadir = tocache('ethzshapes') if datadir is None else datadir self._datadir = remkdir(datadir) if redownload or not os.path.exists(os.path.join(self._datadir, '.complete')): vipy.downloader.download_and_unpack(URL, self._datadir, sha1=SHA1) categorydir = LABELS imlist = [] for (idx_category, category) in enumerate(categorydir): imdir = os.path.join(self._datadir, SUBDIR, category) for filename in os.listdir(imdir): if isjpg(filename) and not filename.startswith('.'): # Write image im = os.path.join(self._datadir, SUBDIR, category, filename) # Write detections gtfile = os.path.join(self._datadir, SUBDIR, category, os.path.splitext(os.path.basename(filename))[0] + '_' + category.lower() + '.groundtruth') if not os.path.isfile(gtfile): gtfile = os.path.join(self._datadir, SUBDIR, category, os.path.splitext(os.path.basename(filename))[0] + '_' + category.lower() + 's.groundtruth') # plural hack for line in open(gtfile,'r'): if line.strip() == '': continue (xmin,ymin,xmax,ymax) = line.strip().split() imlist.append( (im, category, xmin, ymin, xmax, ymax) ) loader = lambda x: ImageDetection(filename=x[0], category=x[1], xmin=x[2], ymin=x[3], xmax=x[4], ymax=x[5]) super().__init__(imlist, id='ethzshapes', loader=loader) open(os.path.join(self._datadir, '.complete'), 'a').close()Ancestors
Inherited members
Dataset:balancedbatchchunkchunksclonecounteven_splitfilterfrequencyfrom_directoryfrom_image_urlsgroupbyididentity_shufflerindexinverse_frequencylistloadlocalmapmapminibatchpartitionpipelinerawrepeatsamplesetshiftshuffleslicesortsplitstreaming_mapstreaming_shufflertaketake_fractiontakebytakelisttakeonetruncatetupleuniform_shufflerzip