Module vipy.data.lfw

Expand source code Browse git
import os
import numpy as np
from vipy.util import remkdir, filetail, dirlist, imlist, readcsv
from vipy.image import ImageCategory
import vipy.downloader


URL = 'http://vis-www.cs.umass.edu/lfw/lfw.tgz'
URL_NAMES = 'http://vis-www.cs.umass.edu/lfw/lfw-names.txt'
URL_PAIRS_DEV_TRAIN = 'http://vis-www.cs.umass.edu/lfw/pairsDevTrain.txt'
URL_PAIRS_DEV_TEST = 'http://vis-www.cs.umass.edu/lfw/pairsDevTest.txt'
URL_PAIRS_VIEW2 = 'http://vis-www.cs.umass.edu/lfw/pairs.txt'


class LFW(vipy.dataset.Dataset):
    def __init__(self, datadir):
        """Datadir contains the unpacked contents of LFW from $URL -> /path/to/lfw"""
        self.lfwdir = datadir
        remkdir(os.path.join(self.lfwdir, 'lfw'))

        if not os.path.exists(os.path.join(self.lfwdir, 'lfw.tgz')):
            self._download()
        super().__init__(self._dataset(), 'lfw')
        
    def _download(self, verbose=True):
        vipy.downloader.download_and_unpack(URL, self.lfwdir, verbose=verbose)
        return self

    def subjects(self):
        """List of all subject names"""
        return [filetail(d) for d in dirlist(os.path.join(self.lfwdir, 'lfw'))]

    def subject_images(self, subject):
        """List of Images of a subject"""
        fnames = imlist(os.path.join(self.lfwdir, 'lfw', subject))
        return [ImageCategory(category=subject, filename=f) for f in fnames]

    def _dataset(self):
        return [ImageCategory(category=s, filename=f) for s in self.subjects() for f in imlist(os.path.join(self.lfwdir, 'lfw', s))]

    def _parse_pairs(self, txtfile):
        pairs = []
        for x in readcsv(os.path.join(self.lfwdir, 'lfw', txtfile), separator='\t'):
            if len(x) == 3:
                pairs.append((ImageCategory(category=x[0], filename=os.path.join(self.lfwdir, 'lfw', x[0], '%s_%04d.jpg' % (x[0], int(x[1])))),
                              ImageCategory(category=x[0], filename=os.path.join(self.lfwdir, 'lfw', x[0], '%s_%04d.jpg' % (x[0], int(x[2]))))))
            elif len(x) == 4:
                pairs.append((ImageCategory(category=x[0], filename=os.path.join(self.lfwdir, 'lfw', x[0], '%s_%04d.jpg' % (x[0], int(x[1])))),
                              ImageCategory(category=x[2], filename=os.path.join(self.lfwdir, 'lfw', x[2], '%s_%04d.jpg' % (x[2], int(x[3]))))))
            else:
                pass
        return pairs

    def _pairsDevTest(self):
        if not os.path.isfile(os.path.join(self.lfwdir, 'lfw', 'pairsDevTest.txt')):
            raise ValueError("Download and save text file to $datadir/pairsDevTest.txt with 'wget %s -O %s'" % (URL_PAIRS_DEV_TRAIN, os.path.join(self.lfwdir, 'lfw' 'pairsDevTest.txt')))
        return self._parse_pairs('pairsDevTest.txt')

    def _pairsDevTrain(self):
        if not os.path.isfile(os.path.join(self.lfwdir, 'lfw', 'pairsDevTrain.txt')):
            raise ValueError("Download and save text file to $datadir/pairsDevTrain.txt with 'wget %s -O %s'" % (URL_PAIRS_DEV_TRAIN, os.path.join(self.lfwdir, 'lfw', 'pairsDevTrain.txt')))
        return self._parse_pairs('pairsDevTrain.txt')

    def _pairs(self):
        if not os.path.isfile(os.path.join(self.lfwdir, 'lfw', 'pairs.txt')):
            raise ValueError("Download and save text file to $datadir/pairs.txt with 'wget %s -O %s'" % (URL_PAIRS_DEV_TRAIN, os.path.join(self.lfwdir, 'lfw', 'pairs.txt')))
        return self._parse_pairs('pairs.txt')

Classes

class LFW (datadir)

vipy.dataset.Dataset() class

Common class to manipulate large sets of vipy objects in parallel

D = vipy.dataset.Dataset([vipy.video.RandomScene(), vipy.video.RandomScene()], id='random_scene')
with vipy.globals.parallel(2):
    D = D.map(lambda v: v.frame(0))
list(D)

Create dataset and export as a directory of json files

D = vipy.dataset.Dataset([vipy.video.RandomScene(), vipy.video.RandomScene()])
D.tojsondir('/tmp/myjsondir')

Create dataset from all json or pkl files recursively discovered in a directory and lazy loaded

D = vipy.dataset.Dataset('/tmp/myjsondir')  # lazy loading

Create dataset from a list of json or pkl files and lazy loaded

D = vipy.dataset.Dataset(['/path/to/file1.json', '/path/to/file2.json'])  # lazy loading

Args

  • abspath [bool]: If true, load all lazy elements with absolute path
  • loader [lambda]: a callable loader that will process the object . This is useful for custom deerialization
  • lazy [bool]: If true, load all pkl or json files using the custom loader when accessed

Notes: Be warned that using the jsondir constructor will load elements on demand, but there are some methods that require loading the entire dataset into memory, and will happily try to do so

Datadir contains the unpacked contents of LFW from $URL -> /path/to/lfw

Expand source code Browse git
class LFW(vipy.dataset.Dataset):
    def __init__(self, datadir):
        """Datadir contains the unpacked contents of LFW from $URL -> /path/to/lfw"""
        self.lfwdir = datadir
        remkdir(os.path.join(self.lfwdir, 'lfw'))

        if not os.path.exists(os.path.join(self.lfwdir, 'lfw.tgz')):
            self._download()
        super().__init__(self._dataset(), 'lfw')
        
    def _download(self, verbose=True):
        vipy.downloader.download_and_unpack(URL, self.lfwdir, verbose=verbose)
        return self

    def subjects(self):
        """List of all subject names"""
        return [filetail(d) for d in dirlist(os.path.join(self.lfwdir, 'lfw'))]

    def subject_images(self, subject):
        """List of Images of a subject"""
        fnames = imlist(os.path.join(self.lfwdir, 'lfw', subject))
        return [ImageCategory(category=subject, filename=f) for f in fnames]

    def _dataset(self):
        return [ImageCategory(category=s, filename=f) for s in self.subjects() for f in imlist(os.path.join(self.lfwdir, 'lfw', s))]

    def _parse_pairs(self, txtfile):
        pairs = []
        for x in readcsv(os.path.join(self.lfwdir, 'lfw', txtfile), separator='\t'):
            if len(x) == 3:
                pairs.append((ImageCategory(category=x[0], filename=os.path.join(self.lfwdir, 'lfw', x[0], '%s_%04d.jpg' % (x[0], int(x[1])))),
                              ImageCategory(category=x[0], filename=os.path.join(self.lfwdir, 'lfw', x[0], '%s_%04d.jpg' % (x[0], int(x[2]))))))
            elif len(x) == 4:
                pairs.append((ImageCategory(category=x[0], filename=os.path.join(self.lfwdir, 'lfw', x[0], '%s_%04d.jpg' % (x[0], int(x[1])))),
                              ImageCategory(category=x[2], filename=os.path.join(self.lfwdir, 'lfw', x[2], '%s_%04d.jpg' % (x[2], int(x[3]))))))
            else:
                pass
        return pairs

    def _pairsDevTest(self):
        if not os.path.isfile(os.path.join(self.lfwdir, 'lfw', 'pairsDevTest.txt')):
            raise ValueError("Download and save text file to $datadir/pairsDevTest.txt with 'wget %s -O %s'" % (URL_PAIRS_DEV_TRAIN, os.path.join(self.lfwdir, 'lfw' 'pairsDevTest.txt')))
        return self._parse_pairs('pairsDevTest.txt')

    def _pairsDevTrain(self):
        if not os.path.isfile(os.path.join(self.lfwdir, 'lfw', 'pairsDevTrain.txt')):
            raise ValueError("Download and save text file to $datadir/pairsDevTrain.txt with 'wget %s -O %s'" % (URL_PAIRS_DEV_TRAIN, os.path.join(self.lfwdir, 'lfw', 'pairsDevTrain.txt')))
        return self._parse_pairs('pairsDevTrain.txt')

    def _pairs(self):
        if not os.path.isfile(os.path.join(self.lfwdir, 'lfw', 'pairs.txt')):
            raise ValueError("Download and save text file to $datadir/pairs.txt with 'wget %s -O %s'" % (URL_PAIRS_DEV_TRAIN, os.path.join(self.lfwdir, 'lfw', 'pairs.txt')))
        return self._parse_pairs('pairs.txt')

Ancestors

Methods

def subject_images(self, subject)

List of Images of a subject

Expand source code Browse git
def subject_images(self, subject):
    """List of Images of a subject"""
    fnames = imlist(os.path.join(self.lfwdir, 'lfw', subject))
    return [ImageCategory(category=subject, filename=f) for f in fnames]
def subjects(self)

List of all subject names

Expand source code Browse git
def subjects(self):
    """List of all subject names"""
    return [filetail(d) for d in dirlist(os.path.join(self.lfwdir, 'lfw'))]

Inherited members