Module vipy.data.vggface2
Expand source code Browse git
import os
import numpy as np
from vipy.util import dirlist, imlist, readcsv, filebase, readlist, groupbyasdict
from vipy.image import ImageDetection
class VGGFace2(object):
def __init__(self, datadir, seed=None):
assert os.path.isdir(os.path.join(datadir, 'n000001')) and os.path.exists(os.path.join(datadir, 'identity_meta.csv')), 'Download and unpack VGGFace2 data and metadata (http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/) to "%s"' % datadir
self.datadir = datadir
self._subjects = None
if seed is not None:
np.random.seed(seed) # for repeatable np.random
def __repr__(self):
return str('<vipy.data.vggface2: %s>' % self.datadir)
def subjects(self):
if self._subjects is None:
self._subjects = [filebase(d) for d in dirlist(self.datadir)]
return self._subjects # cached
def wordnetid_to_name(self):
csv = readcsv(os.path.join(self.datadir, 'identity_meta.csv'), ignoreheader=True)
return {str(x[0]):str(x[1]).replace('"', '') for x in csv}
def vggface2_to_vggface1(self):
assert os.path.exists(os.path.join(self.datadir, 'class_overlap_vgg1_2.txt')), 'Download class_overlap_vgg1_2.txt to "%s"' % self.datadir
csv = readcsv(os.path.join(self.datadir, 'class_overlap_vgg1_2.txt'), separator=' ', ignoreheader=True)
return {x[0]:x[1] for x in csv}
def name_to_wordnetid(self):
d = self.wordnetid_to_name()
return {v:k for (k,v) in d.items()}
def names(self):
return list(self.wordnetid_to_name().values())
def trainset(self):
assert os.path.exists(os.path.join(self.datadir, 'train_list.txt')), 'Download "train_list.txt" from http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/ to "%s"' % self.datadir
csv = readlist(os.path.join(self.datadir, 'train_list.txt'))
return [os.path.join(self.datadir, x).strip() for x in csv]
def testset(self):
assert os.path.exists(os.path.join(self.datadir, 'test_list.txt')), 'Download "test_list.txt" from http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/ to "%s"' % self.datadir
csv = readlist(os.path.join(self.datadir, 'test_list.txt'))
return [os.path.join(self.datadir, x).strip() for x in csv]
def split(self, f):
"""Convert absolute path /path/to/subjectid/filename.jpg from training or testing set to (subjectid, filename.jpg)"""
x = os.path.split(f)
subjectid = os.path.split(x[-2])[-1]
imagefile = x[-1]
return (subjectid, imagefile)
def frontalset(self, n_frontal=1):
# http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/meta/test_posetemp_imglist.txt
assert(n_frontal >= 1 and n_frontal <= 10)
assert os.path.exists(os.path.join(self.datadir, 'test_posetemp_imglist.txt')), 'Download "test_posetemp_imglist.txt" from (http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/meta/test_posetemp_imglist.txt) to "%s"' % self.datadir
d = groupbyasdict([x.strip().split('/') for x in readlist(os.path.join(self.datadir, 'test_posetemp_imglist.txt'))], lambda v: v[0])
d_subjectid_to_frontallist = {k:[os.path.join(self.datadir, k, y[1]) for y in v[0:n_frontal]] for (k,v) in d.items()} # first and second set of five are frontal
for (k,v) in d_subjectid_to_frontallist.items():
for f in v:
yield ImageDetection(filename=f).category(k)
def dataset(self):
"""Return a generator to iterate over dataset"""
for d in dirlist(os.path.join(self.datadir)):
for f in imlist(d):
yield ImageDetection(filename=f).category(filebase(d))
def fastset(self):
"""Return a generator to iterate over dataset"""
for d in dirlist(os.path.join(self.datadir)):
for f in imlist(d):
yield ImageDetection(filename=f, category=filebase(d))
def take(self, n, wordnetid=None):
"""Randomly select n images from the dataset, or n images of a given subjectid"""
subjectid = np.random.choice(self.subjects(), n) if wordnetid is None else [wordnetid] * n
takelist = []
for s in subjectid:
d = os.path.join(self.datadir, s)
f = np.random.choice(imlist(d),1)[0]
im = ImageDetection(filename=f).category(filebase(d))
takelist.append(im)
return takelist
def take_per_subject(self, n):
"""Randomly select n images per subject from the dataset"""
subjectid = self.subjects()
takelist = []
for s in subjectid:
d = os.path.join(self.datadir, s)
for k in range(0,n):
f = np.random.choice(imlist(d),1)[0]
im = ImageDetection(filename=f).category(filebase(d))
takelist.append(im)
return takelist
def subjectset(self, wordnetid):
"""Iterator for single subject"""
assert wordnetid in self.wordnetid_to_name().keys(), 'Invalid wordnetid "%s"' % wordnetid
d = os.path.join(self.datadir, wordnetid)
for f in imlist(d):
yield ImageDetection(filename=f, category=filebase(d))
Classes
class VGGFace2 (datadir, seed=None)
-
Expand source code Browse git
class VGGFace2(object): def __init__(self, datadir, seed=None): assert os.path.isdir(os.path.join(datadir, 'n000001')) and os.path.exists(os.path.join(datadir, 'identity_meta.csv')), 'Download and unpack VGGFace2 data and metadata (http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/) to "%s"' % datadir self.datadir = datadir self._subjects = None if seed is not None: np.random.seed(seed) # for repeatable np.random def __repr__(self): return str('<vipy.data.vggface2: %s>' % self.datadir) def subjects(self): if self._subjects is None: self._subjects = [filebase(d) for d in dirlist(self.datadir)] return self._subjects # cached def wordnetid_to_name(self): csv = readcsv(os.path.join(self.datadir, 'identity_meta.csv'), ignoreheader=True) return {str(x[0]):str(x[1]).replace('"', '') for x in csv} def vggface2_to_vggface1(self): assert os.path.exists(os.path.join(self.datadir, 'class_overlap_vgg1_2.txt')), 'Download class_overlap_vgg1_2.txt to "%s"' % self.datadir csv = readcsv(os.path.join(self.datadir, 'class_overlap_vgg1_2.txt'), separator=' ', ignoreheader=True) return {x[0]:x[1] for x in csv} def name_to_wordnetid(self): d = self.wordnetid_to_name() return {v:k for (k,v) in d.items()} def names(self): return list(self.wordnetid_to_name().values()) def trainset(self): assert os.path.exists(os.path.join(self.datadir, 'train_list.txt')), 'Download "train_list.txt" from http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/ to "%s"' % self.datadir csv = readlist(os.path.join(self.datadir, 'train_list.txt')) return [os.path.join(self.datadir, x).strip() for x in csv] def testset(self): assert os.path.exists(os.path.join(self.datadir, 'test_list.txt')), 'Download "test_list.txt" from http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/ to "%s"' % self.datadir csv = readlist(os.path.join(self.datadir, 'test_list.txt')) return [os.path.join(self.datadir, x).strip() for x in csv] def split(self, f): """Convert absolute path /path/to/subjectid/filename.jpg from training or testing set to (subjectid, filename.jpg)""" x = os.path.split(f) subjectid = os.path.split(x[-2])[-1] imagefile = x[-1] return (subjectid, imagefile) def frontalset(self, n_frontal=1): # http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/meta/test_posetemp_imglist.txt assert(n_frontal >= 1 and n_frontal <= 10) assert os.path.exists(os.path.join(self.datadir, 'test_posetemp_imglist.txt')), 'Download "test_posetemp_imglist.txt" from (http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/meta/test_posetemp_imglist.txt) to "%s"' % self.datadir d = groupbyasdict([x.strip().split('/') for x in readlist(os.path.join(self.datadir, 'test_posetemp_imglist.txt'))], lambda v: v[0]) d_subjectid_to_frontallist = {k:[os.path.join(self.datadir, k, y[1]) for y in v[0:n_frontal]] for (k,v) in d.items()} # first and second set of five are frontal for (k,v) in d_subjectid_to_frontallist.items(): for f in v: yield ImageDetection(filename=f).category(k) def dataset(self): """Return a generator to iterate over dataset""" for d in dirlist(os.path.join(self.datadir)): for f in imlist(d): yield ImageDetection(filename=f).category(filebase(d)) def fastset(self): """Return a generator to iterate over dataset""" for d in dirlist(os.path.join(self.datadir)): for f in imlist(d): yield ImageDetection(filename=f, category=filebase(d)) def take(self, n, wordnetid=None): """Randomly select n images from the dataset, or n images of a given subjectid""" subjectid = np.random.choice(self.subjects(), n) if wordnetid is None else [wordnetid] * n takelist = [] for s in subjectid: d = os.path.join(self.datadir, s) f = np.random.choice(imlist(d),1)[0] im = ImageDetection(filename=f).category(filebase(d)) takelist.append(im) return takelist def take_per_subject(self, n): """Randomly select n images per subject from the dataset""" subjectid = self.subjects() takelist = [] for s in subjectid: d = os.path.join(self.datadir, s) for k in range(0,n): f = np.random.choice(imlist(d),1)[0] im = ImageDetection(filename=f).category(filebase(d)) takelist.append(im) return takelist def subjectset(self, wordnetid): """Iterator for single subject""" assert wordnetid in self.wordnetid_to_name().keys(), 'Invalid wordnetid "%s"' % wordnetid d = os.path.join(self.datadir, wordnetid) for f in imlist(d): yield ImageDetection(filename=f, category=filebase(d))
Methods
def dataset(self)
-
Return a generator to iterate over dataset
Expand source code Browse git
def dataset(self): """Return a generator to iterate over dataset""" for d in dirlist(os.path.join(self.datadir)): for f in imlist(d): yield ImageDetection(filename=f).category(filebase(d))
def fastset(self)
-
Return a generator to iterate over dataset
Expand source code Browse git
def fastset(self): """Return a generator to iterate over dataset""" for d in dirlist(os.path.join(self.datadir)): for f in imlist(d): yield ImageDetection(filename=f, category=filebase(d))
def frontalset(self, n_frontal=1)
-
Expand source code Browse git
def frontalset(self, n_frontal=1): # http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/meta/test_posetemp_imglist.txt assert(n_frontal >= 1 and n_frontal <= 10) assert os.path.exists(os.path.join(self.datadir, 'test_posetemp_imglist.txt')), 'Download "test_posetemp_imglist.txt" from (http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/meta/test_posetemp_imglist.txt) to "%s"' % self.datadir d = groupbyasdict([x.strip().split('/') for x in readlist(os.path.join(self.datadir, 'test_posetemp_imglist.txt'))], lambda v: v[0]) d_subjectid_to_frontallist = {k:[os.path.join(self.datadir, k, y[1]) for y in v[0:n_frontal]] for (k,v) in d.items()} # first and second set of five are frontal for (k,v) in d_subjectid_to_frontallist.items(): for f in v: yield ImageDetection(filename=f).category(k)
def name_to_wordnetid(self)
-
Expand source code Browse git
def name_to_wordnetid(self): d = self.wordnetid_to_name() return {v:k for (k,v) in d.items()}
def names(self)
-
Expand source code Browse git
def names(self): return list(self.wordnetid_to_name().values())
def split(self, f)
-
Convert absolute path /path/to/subjectid/filename.jpg from training or testing set to (subjectid, filename.jpg)
Expand source code Browse git
def split(self, f): """Convert absolute path /path/to/subjectid/filename.jpg from training or testing set to (subjectid, filename.jpg)""" x = os.path.split(f) subjectid = os.path.split(x[-2])[-1] imagefile = x[-1] return (subjectid, imagefile)
def subjects(self)
-
Expand source code Browse git
def subjects(self): if self._subjects is None: self._subjects = [filebase(d) for d in dirlist(self.datadir)] return self._subjects # cached
def subjectset(self, wordnetid)
-
Iterator for single subject
Expand source code Browse git
def subjectset(self, wordnetid): """Iterator for single subject""" assert wordnetid in self.wordnetid_to_name().keys(), 'Invalid wordnetid "%s"' % wordnetid d = os.path.join(self.datadir, wordnetid) for f in imlist(d): yield ImageDetection(filename=f, category=filebase(d))
def take(self, n, wordnetid=None)
-
Randomly select n images from the dataset, or n images of a given subjectid
Expand source code Browse git
def take(self, n, wordnetid=None): """Randomly select n images from the dataset, or n images of a given subjectid""" subjectid = np.random.choice(self.subjects(), n) if wordnetid is None else [wordnetid] * n takelist = [] for s in subjectid: d = os.path.join(self.datadir, s) f = np.random.choice(imlist(d),1)[0] im = ImageDetection(filename=f).category(filebase(d)) takelist.append(im) return takelist
def take_per_subject(self, n)
-
Randomly select n images per subject from the dataset
Expand source code Browse git
def take_per_subject(self, n): """Randomly select n images per subject from the dataset""" subjectid = self.subjects() takelist = [] for s in subjectid: d = os.path.join(self.datadir, s) for k in range(0,n): f = np.random.choice(imlist(d),1)[0] im = ImageDetection(filename=f).category(filebase(d)) takelist.append(im) return takelist
def testset(self)
-
Expand source code Browse git
def testset(self): assert os.path.exists(os.path.join(self.datadir, 'test_list.txt')), 'Download "test_list.txt" from http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/ to "%s"' % self.datadir csv = readlist(os.path.join(self.datadir, 'test_list.txt')) return [os.path.join(self.datadir, x).strip() for x in csv]
def trainset(self)
-
Expand source code Browse git
def trainset(self): assert os.path.exists(os.path.join(self.datadir, 'train_list.txt')), 'Download "train_list.txt" from http://www.robots.ox.ac.uk/~vgg/data/vgg_face2/ to "%s"' % self.datadir csv = readlist(os.path.join(self.datadir, 'train_list.txt')) return [os.path.join(self.datadir, x).strip() for x in csv]
def vggface2_to_vggface1(self)
-
Expand source code Browse git
def vggface2_to_vggface1(self): assert os.path.exists(os.path.join(self.datadir, 'class_overlap_vgg1_2.txt')), 'Download class_overlap_vgg1_2.txt to "%s"' % self.datadir csv = readcsv(os.path.join(self.datadir, 'class_overlap_vgg1_2.txt'), separator=' ', ignoreheader=True) return {x[0]:x[1] for x in csv}
def wordnetid_to_name(self)
-
Expand source code Browse git
def wordnetid_to_name(self): csv = readcsv(os.path.join(self.datadir, 'identity_meta.csv'), ignoreheader=True) return {str(x[0]):str(x[1]).replace('"', '') for x in csv}