Module vipy.data.megaface

Expand source code Browse git
import os
from vipy.util import remkdir, dirlist, imlist, filebase, readcsv, writecsv
from vipy.image import ImageDetection
import json
import numpy as np


class MF2(object):
    def __init__(self, datadir='/proj/janus3/megaface'):
        self.datadir = remkdir(datadir)

    def __repr__(self):
        return str('<vipy.data.megaface: "%s">' % self.datadir)

    def _trainset(self):
        """Save a csv file containing each image on a line for Megaface_Challenge_1M_disjoint_LOOSE.tar.gz"""
        outfile = os.path.join(self.datadir, 'Megaface_Challenge_1M_disjoint_LOOSE.csv')
        subdir = os.path.join(self.datadir, 'Megaface_Challenge_1M_disjoint_LOOSE')
        D = dirlist(subdir)
        filelist = []
        for (k,d) in enumerate(D):
            print('[MF2.trainset][%d/%d]: creating image list for "%s"' % (k, len(D), d))
            for f in imlist(d):
                filelist.append((f, filebase(d)))
        return writecsv(filelist, outfile)

    def tinyset(self, size=1000):
        """Return the first (size) image objects in the trainset"""
        outlist = []
        if not os.path.exists(os.path.join(self.datadir, 'Megaface_Challenge_1M_disjoint_LOOSE.csv')):
            print('[MF2.tinyset]: generating csv file for MF2')
            self._trainset()

        imglist = np.random.permutation([f[0] for f in readcsv(os.path.join(self.datadir, 'Megaface_Challenge_1M_disjoint_LOOSE.csv'))])
        for (k,f) in enumerate(imglist):
            print('[MF2.tinyset][%d/%d]: importing "%s"' % (k, size, f))
            outlist = outlist + [ImageDetection(filename=os.path.join(self.datadir, f), category=filebase(f))]
            if k > size:
                break
        return outlist


class Megaface(object):
    def __init__(self, datadir):
        self.datadir = datadir

    def __repr__(self):
        return str('<vipy.data.megaface: %s>' % self.datadir)

    def _attributes(self, imgfile):
        return json.load(open(imgfile + '.json', 'r'))

    def _imagelist(self):
        """Save a csv file containing each image on a line"""
        if os.path.exists(os.path.join(self.datadir, 'megaface.csv')):
            return(os.path.join(self.datadir, 'megaface.csv'))
        else:
            outfile = os.path.join(self.datadir, 'megaface.csv')
            with open(outfile, 'w') as csv:
                subdir = os.path.join(self.datadir, 'FlickrFinal2')
                D = dirlist(subdir)
                for (k,d) in enumerate(D):
                    print('[megaface.dataset][%d/%d]: creating image list for "%s"' % (k, len(D), d))
                    for sd in dirlist(d):
                        for f in imlist(sd):
                            csv.write(f + '\n')  # full path
            return outfile

    def tinyset(self, size=1000):
        """Return the first (size) image objects in the dataset"""
        outlist = []
        imglist = np.random.permutation([f[0] for f in readcsv(self._imagelist())])
        for (k,f) in enumerate(imglist):
            print('[megaface.dataset][%d/%d]: importing "%s"' % (k, size, f))
            A = self._attributes(os.path.join(self.datadir, f))
            outlist = outlist + [ImageDetection(filename=os.path.join(self.datadir, f), category=filebase(f)).boundingbox(xmin=A['bounding_box']['x'], ymin=A['bounding_box']['y'], width=A['bounding_box']['width'], height=A['bounding_box']['height'])]
            if k > size:
                break
        return outlist

Classes

class MF2 (datadir='/proj/janus3/megaface')
Expand source code Browse git
class MF2(object):
    def __init__(self, datadir='/proj/janus3/megaface'):
        self.datadir = remkdir(datadir)

    def __repr__(self):
        return str('<vipy.data.megaface: "%s">' % self.datadir)

    def _trainset(self):
        """Save a csv file containing each image on a line for Megaface_Challenge_1M_disjoint_LOOSE.tar.gz"""
        outfile = os.path.join(self.datadir, 'Megaface_Challenge_1M_disjoint_LOOSE.csv')
        subdir = os.path.join(self.datadir, 'Megaface_Challenge_1M_disjoint_LOOSE')
        D = dirlist(subdir)
        filelist = []
        for (k,d) in enumerate(D):
            print('[MF2.trainset][%d/%d]: creating image list for "%s"' % (k, len(D), d))
            for f in imlist(d):
                filelist.append((f, filebase(d)))
        return writecsv(filelist, outfile)

    def tinyset(self, size=1000):
        """Return the first (size) image objects in the trainset"""
        outlist = []
        if not os.path.exists(os.path.join(self.datadir, 'Megaface_Challenge_1M_disjoint_LOOSE.csv')):
            print('[MF2.tinyset]: generating csv file for MF2')
            self._trainset()

        imglist = np.random.permutation([f[0] for f in readcsv(os.path.join(self.datadir, 'Megaface_Challenge_1M_disjoint_LOOSE.csv'))])
        for (k,f) in enumerate(imglist):
            print('[MF2.tinyset][%d/%d]: importing "%s"' % (k, size, f))
            outlist = outlist + [ImageDetection(filename=os.path.join(self.datadir, f), category=filebase(f))]
            if k > size:
                break
        return outlist

Methods

def tinyset(self, size=1000)

Return the first (size) image objects in the trainset

Expand source code Browse git
def tinyset(self, size=1000):
    """Return the first (size) image objects in the trainset"""
    outlist = []
    if not os.path.exists(os.path.join(self.datadir, 'Megaface_Challenge_1M_disjoint_LOOSE.csv')):
        print('[MF2.tinyset]: generating csv file for MF2')
        self._trainset()

    imglist = np.random.permutation([f[0] for f in readcsv(os.path.join(self.datadir, 'Megaface_Challenge_1M_disjoint_LOOSE.csv'))])
    for (k,f) in enumerate(imglist):
        print('[MF2.tinyset][%d/%d]: importing "%s"' % (k, size, f))
        outlist = outlist + [ImageDetection(filename=os.path.join(self.datadir, f), category=filebase(f))]
        if k > size:
            break
    return outlist
class Megaface (datadir)
Expand source code Browse git
class Megaface(object):
    def __init__(self, datadir):
        self.datadir = datadir

    def __repr__(self):
        return str('<vipy.data.megaface: %s>' % self.datadir)

    def _attributes(self, imgfile):
        return json.load(open(imgfile + '.json', 'r'))

    def _imagelist(self):
        """Save a csv file containing each image on a line"""
        if os.path.exists(os.path.join(self.datadir, 'megaface.csv')):
            return(os.path.join(self.datadir, 'megaface.csv'))
        else:
            outfile = os.path.join(self.datadir, 'megaface.csv')
            with open(outfile, 'w') as csv:
                subdir = os.path.join(self.datadir, 'FlickrFinal2')
                D = dirlist(subdir)
                for (k,d) in enumerate(D):
                    print('[megaface.dataset][%d/%d]: creating image list for "%s"' % (k, len(D), d))
                    for sd in dirlist(d):
                        for f in imlist(sd):
                            csv.write(f + '\n')  # full path
            return outfile

    def tinyset(self, size=1000):
        """Return the first (size) image objects in the dataset"""
        outlist = []
        imglist = np.random.permutation([f[0] for f in readcsv(self._imagelist())])
        for (k,f) in enumerate(imglist):
            print('[megaface.dataset][%d/%d]: importing "%s"' % (k, size, f))
            A = self._attributes(os.path.join(self.datadir, f))
            outlist = outlist + [ImageDetection(filename=os.path.join(self.datadir, f), category=filebase(f)).boundingbox(xmin=A['bounding_box']['x'], ymin=A['bounding_box']['y'], width=A['bounding_box']['width'], height=A['bounding_box']['height'])]
            if k > size:
                break
        return outlist

Methods

def tinyset(self, size=1000)

Return the first (size) image objects in the dataset

Expand source code Browse git
def tinyset(self, size=1000):
    """Return the first (size) image objects in the dataset"""
    outlist = []
    imglist = np.random.permutation([f[0] for f in readcsv(self._imagelist())])
    for (k,f) in enumerate(imglist):
        print('[megaface.dataset][%d/%d]: importing "%s"' % (k, size, f))
        A = self._attributes(os.path.join(self.datadir, f))
        outlist = outlist + [ImageDetection(filename=os.path.join(self.datadir, f), category=filebase(f)).boundingbox(xmin=A['bounding_box']['x'], ymin=A['bounding_box']['y'], width=A['bounding_box']['width'], height=A['bounding_box']['height'])]
        if k > size:
            break
    return outlist