Module vipy.data.cc12m
Expand source code Browse git
import os
import vipy
from vipy.util import tocache, readcsv
from vipy.dataset import Dataset
class CC12M(Dataset):
"""https://github.com/google-research-datasets/conceptual-12m"""
def __init__(self, datadir=None):
outdir = tocache('cc12m') if datadir is None else datadir
csvfile = os.path.join(outdir, 'cc12m.tsv')
assert os.path.exists(csvfile), "download from https://github.com/google-research-datasets/conceptual-12m"
csv = readcsv(csvfile, separator='\t')
loader = lambda r: vipy.image.ImageCategory(url=r[0], category=r[1])
super().__init__(csv, loader=loader)
Classes
class CC12M (datadir=None)
-
Expand source code Browse git
class CC12M(Dataset): """https://github.com/google-research-datasets/conceptual-12m""" def __init__(self, datadir=None): outdir = tocache('cc12m') if datadir is None else datadir csvfile = os.path.join(outdir, 'cc12m.tsv') assert os.path.exists(csvfile), "download from https://github.com/google-research-datasets/conceptual-12m" csv = readcsv(csvfile, separator='\t') loader = lambda r: vipy.image.ImageCategory(url=r[0], category=r[1]) super().__init__(csv, loader=loader)
Ancestors
Inherited members
Dataset
:balanced
batch
chunk
chunks
clone
count
even_split
filter
frequency
from_directory
from_image_urls
groupby
id
identity_shuffler
index
inverse_frequency
list
load
localmap
map
minibatch
partition
pipeline
raw
repeat
sample
set
shift
shuffle
slice
sort
split
streaming_map
streaming_shuffler
take
take_fraction
takeby
takelist
takeone
truncate
tuple
uniform_shuffler
zip