Module vipy.data.lfw
Expand source code Browse git
import os
import numpy as np
from vipy.util import remkdir, filetail, dirlist, imlist, readcsv
from vipy.image import ImageCategory
import vipy.downloader
URL = 'http://vis-www.cs.umass.edu/lfw/lfw.tgz'
URL_NAMES = 'http://vis-www.cs.umass.edu/lfw/lfw-names.txt'
URL_PAIRS_DEV_TRAIN = 'http://vis-www.cs.umass.edu/lfw/pairsDevTrain.txt'
URL_PAIRS_DEV_TEST = 'http://vis-www.cs.umass.edu/lfw/pairsDevTest.txt'
URL_PAIRS_VIEW2 = 'http://vis-www.cs.umass.edu/lfw/pairs.txt'
class LFW(vipy.dataset.Dataset):
def __init__(self, datadir):
"""Datadir contains the unpacked contents of LFW from $URL -> /path/to/lfw"""
self.lfwdir = datadir
remkdir(os.path.join(self.lfwdir, 'lfw'))
if not os.path.exists(os.path.join(self.lfwdir, 'lfw.tgz')):
self._download()
super().__init__(self._dataset(), 'lfw')
def _download(self, verbose=True):
vipy.downloader.download_and_unpack(URL, self.lfwdir, verbose=verbose)
return self
def subjects(self):
"""List of all subject names"""
return [filetail(d) for d in dirlist(os.path.join(self.lfwdir, 'lfw'))]
def subject_images(self, subject):
"""List of Images of a subject"""
fnames = imlist(os.path.join(self.lfwdir, 'lfw', subject))
return [ImageCategory(category=subject, filename=f) for f in fnames]
def _dataset(self):
return [ImageCategory(category=s, filename=f) for s in self.subjects() for f in imlist(os.path.join(self.lfwdir, 'lfw', s))]
def _parse_pairs(self, txtfile):
pairs = []
for x in readcsv(os.path.join(self.lfwdir, 'lfw', txtfile), separator='\t'):
if len(x) == 3:
pairs.append((ImageCategory(category=x[0], filename=os.path.join(self.lfwdir, 'lfw', x[0], '%s_%04d.jpg' % (x[0], int(x[1])))),
ImageCategory(category=x[0], filename=os.path.join(self.lfwdir, 'lfw', x[0], '%s_%04d.jpg' % (x[0], int(x[2]))))))
elif len(x) == 4:
pairs.append((ImageCategory(category=x[0], filename=os.path.join(self.lfwdir, 'lfw', x[0], '%s_%04d.jpg' % (x[0], int(x[1])))),
ImageCategory(category=x[2], filename=os.path.join(self.lfwdir, 'lfw', x[2], '%s_%04d.jpg' % (x[2], int(x[3]))))))
else:
pass
return pairs
def _pairsDevTest(self):
if not os.path.isfile(os.path.join(self.lfwdir, 'lfw', 'pairsDevTest.txt')):
raise ValueError("Download and save text file to $datadir/pairsDevTest.txt with 'wget %s -O %s'" % (URL_PAIRS_DEV_TRAIN, os.path.join(self.lfwdir, 'lfw' 'pairsDevTest.txt')))
return self._parse_pairs('pairsDevTest.txt')
def _pairsDevTrain(self):
if not os.path.isfile(os.path.join(self.lfwdir, 'lfw', 'pairsDevTrain.txt')):
raise ValueError("Download and save text file to $datadir/pairsDevTrain.txt with 'wget %s -O %s'" % (URL_PAIRS_DEV_TRAIN, os.path.join(self.lfwdir, 'lfw', 'pairsDevTrain.txt')))
return self._parse_pairs('pairsDevTrain.txt')
def _pairs(self):
if not os.path.isfile(os.path.join(self.lfwdir, 'lfw', 'pairs.txt')):
raise ValueError("Download and save text file to $datadir/pairs.txt with 'wget %s -O %s'" % (URL_PAIRS_DEV_TRAIN, os.path.join(self.lfwdir, 'lfw', 'pairs.txt')))
return self._parse_pairs('pairs.txt')
Classes
class LFW (datadir)
-
vipy.dataset.Dataset() class
Common class to manipulate large sets of vipy objects in parallel
D = vipy.dataset.Dataset([vipy.video.RandomScene(), vipy.video.RandomScene()], id='random_scene') with vipy.globals.parallel(2): D = D.map(lambda v: v.frame(0)) list(D)
Create dataset and export as a directory of json files
D = vipy.dataset.Dataset([vipy.video.RandomScene(), vipy.video.RandomScene()]) D.tojsondir('/tmp/myjsondir')
Create dataset from all json or pkl files recursively discovered in a directory and lazy loaded
D = vipy.dataset.Dataset('/tmp/myjsondir') # lazy loading
Create dataset from a list of json or pkl files and lazy loaded
D = vipy.dataset.Dataset(['/path/to/file1.json', '/path/to/file2.json']) # lazy loading
Args
- abspath [bool]: If true, load all lazy elements with absolute path
- loader [lambda]: a callable loader that will process the object . This is useful for custom deerialization
- lazy [bool]: If true, load all pkl or json files using the custom loader when accessed
Notes: Be warned that using the jsondir constructor will load elements on demand, but there are some methods that require loading the entire dataset into memory, and will happily try to do so
Datadir contains the unpacked contents of LFW from $URL -> /path/to/lfw
Expand source code Browse git
class LFW(vipy.dataset.Dataset): def __init__(self, datadir): """Datadir contains the unpacked contents of LFW from $URL -> /path/to/lfw""" self.lfwdir = datadir remkdir(os.path.join(self.lfwdir, 'lfw')) if not os.path.exists(os.path.join(self.lfwdir, 'lfw.tgz')): self._download() super().__init__(self._dataset(), 'lfw') def _download(self, verbose=True): vipy.downloader.download_and_unpack(URL, self.lfwdir, verbose=verbose) return self def subjects(self): """List of all subject names""" return [filetail(d) for d in dirlist(os.path.join(self.lfwdir, 'lfw'))] def subject_images(self, subject): """List of Images of a subject""" fnames = imlist(os.path.join(self.lfwdir, 'lfw', subject)) return [ImageCategory(category=subject, filename=f) for f in fnames] def _dataset(self): return [ImageCategory(category=s, filename=f) for s in self.subjects() for f in imlist(os.path.join(self.lfwdir, 'lfw', s))] def _parse_pairs(self, txtfile): pairs = [] for x in readcsv(os.path.join(self.lfwdir, 'lfw', txtfile), separator='\t'): if len(x) == 3: pairs.append((ImageCategory(category=x[0], filename=os.path.join(self.lfwdir, 'lfw', x[0], '%s_%04d.jpg' % (x[0], int(x[1])))), ImageCategory(category=x[0], filename=os.path.join(self.lfwdir, 'lfw', x[0], '%s_%04d.jpg' % (x[0], int(x[2])))))) elif len(x) == 4: pairs.append((ImageCategory(category=x[0], filename=os.path.join(self.lfwdir, 'lfw', x[0], '%s_%04d.jpg' % (x[0], int(x[1])))), ImageCategory(category=x[2], filename=os.path.join(self.lfwdir, 'lfw', x[2], '%s_%04d.jpg' % (x[2], int(x[3])))))) else: pass return pairs def _pairsDevTest(self): if not os.path.isfile(os.path.join(self.lfwdir, 'lfw', 'pairsDevTest.txt')): raise ValueError("Download and save text file to $datadir/pairsDevTest.txt with 'wget %s -O %s'" % (URL_PAIRS_DEV_TRAIN, os.path.join(self.lfwdir, 'lfw' 'pairsDevTest.txt'))) return self._parse_pairs('pairsDevTest.txt') def _pairsDevTrain(self): if not os.path.isfile(os.path.join(self.lfwdir, 'lfw', 'pairsDevTrain.txt')): raise ValueError("Download and save text file to $datadir/pairsDevTrain.txt with 'wget %s -O %s'" % (URL_PAIRS_DEV_TRAIN, os.path.join(self.lfwdir, 'lfw', 'pairsDevTrain.txt'))) return self._parse_pairs('pairsDevTrain.txt') def _pairs(self): if not os.path.isfile(os.path.join(self.lfwdir, 'lfw', 'pairs.txt')): raise ValueError("Download and save text file to $datadir/pairs.txt with 'wget %s -O %s'" % (URL_PAIRS_DEV_TRAIN, os.path.join(self.lfwdir, 'lfw', 'pairs.txt'))) return self._parse_pairs('pairs.txt')
Ancestors
Methods
def subject_images(self, subject)
-
List of Images of a subject
Expand source code Browse git
def subject_images(self, subject): """List of Images of a subject""" fnames = imlist(os.path.join(self.lfwdir, 'lfw', subject)) return [ImageCategory(category=subject, filename=f) for f in fnames]
def subjects(self)
-
List of all subject names
Expand source code Browse git
def subjects(self): """List of all subject names""" return [filetail(d) for d in dirlist(os.path.join(self.lfwdir, 'lfw'))]
Inherited members
Dataset
:archive
categories
chunk
class_to_index
classes
classlist
clone
count
countby
density
duration_in_seconds
filter
flatten
id
index_to_class
inverse_frequency_weight
istype
jsondir
label_to_index
list
load
map
merge
minibatch
multilabel_inverse_frequency_weight
num_categories
num_classes
num_labels
percentage
replace
save
set
shuffle
shuffler
sort
split
split_by_videoid
synonym
take
take_per_category
takefilter
takelist
takeone
to_torch
to_torch_tensordir
tohtml
tojsondir
tolist
video_duration_in_seconds
video_montage
zip