Module vipy.data.inaturalist
Expand source code Browse git
import os
import vipy
TRAIN_IMG_2021_URL = 'https://ml-inat-competition-datasets.s3.amazonaws.com/2021/train.tar.gz'
TRAIN_IMG_2021_MD5 = 'e0526d53c7f7b2e3167b2b43bb2690ed'
TRAIN_ANNO_2021_URL = 'https://ml-inat-competition-datasets.s3.amazonaws.com/2021/train.json.tar.gz'
TRAIN_ANNO_2021_MD5 = '38a7bb733f7a09214d44293460ec0021'
VAL_IMG_2021_URL = 'https://ml-inat-competition-datasets.s3.amazonaws.com/2021/val.tar.gz'
VAL_IMG_2021_MD5 = 'f6f6e0e242e3d4c9569ba56400938afc'
VAL_ANNO_2021_URL = 'https://ml-inat-competition-datasets.s3.amazonaws.com/2021/val.json.tar.gz'
VAL_ANNO_2021_MD5 = '4d761e0f6a86cc63e8f7afc91f6a8f0b'
class iNaturalist2021(vipy.dataset.Dataset):
"""Project: https://github.com/visipedia/inat_comp/tree/master/2021"""
def __init__(self, datadir, imageurl=TRAIN_IMG_2021_URL, imagemd5=TRAIN_IMG_2021_MD5, annourl=TRAIN_ANNO_2021_URL, annomd5=TRAIN_ANNO_2021_MD5, name='inaturalist_train'):
self._datadir = vipy.util.remkdir(datadir)
if not os.path.exists(os.path.join(self._datadir, vipy.util.filetail(imageurl))):
vipy.downloader.download_and_unpack(imageurl, self._datadir, md5=imagemd5)
if not os.path.exists(os.path.join(self._datadir, vipy.util.filetail(annourl))):
vipy.downloader.download_and_unpack(annourl, self._datadir, md5=annomd5)
json = vipy.util.readjson(os.path.join(self._datadir, vipy.util.filetail(annourl)[:-7])) # remove trailing file extension (val.json.tar.gz -> val.json)
d_imageid_to_filename = {x['id']:os.path.join(self._datadir, x['file_name']) for x in json['images']}
d_imageid_to_annotation = {iid:a[0] for (iid,a) in vipy.util.groupbyasdict(json['annotations'], lambda x: x['image_id']).items()} # one annotation per image
d_categoryid_to_category = {x['id']:x['name'] for x in json['categories']}
imlist = [vipy.image.ImageCategory(filename=f,
category=d_categoryid_to_category[d_imageid_to_annotation[iid]['category_id']] if iid in d_imageid_to_annotation else None,
attributes={'category_id': d_imageid_to_annotation[iid]['category_id']} if iid in d_imageid_to_annotation else None)
for (iid,f) in d_imageid_to_filename.items()]
super().__init__(imlist, id='iNaturalist2021')
def trainset(self):
return self
def valset(self):
return iNaturalist2021(self._datadir, VAL_IMG_2021_URL, VAL_IMG_2021_MD5, VAL_ANNO_2021_URL, VAL_ANNO_2021_MD5, name='inaturalist_val')
Classes
class iNaturalist2021 (datadir, imageurl='https://ml-inat-competition-datasets.s3.amazonaws.com/2021/train.tar.gz', imagemd5='e0526d53c7f7b2e3167b2b43bb2690ed', annourl='https://ml-inat-competition-datasets.s3.amazonaws.com/2021/train.json.tar.gz', annomd5='38a7bb733f7a09214d44293460ec0021', name='inaturalist_train')
-
Expand source code Browse git
class iNaturalist2021(vipy.dataset.Dataset): """Project: https://github.com/visipedia/inat_comp/tree/master/2021""" def __init__(self, datadir, imageurl=TRAIN_IMG_2021_URL, imagemd5=TRAIN_IMG_2021_MD5, annourl=TRAIN_ANNO_2021_URL, annomd5=TRAIN_ANNO_2021_MD5, name='inaturalist_train'): self._datadir = vipy.util.remkdir(datadir) if not os.path.exists(os.path.join(self._datadir, vipy.util.filetail(imageurl))): vipy.downloader.download_and_unpack(imageurl, self._datadir, md5=imagemd5) if not os.path.exists(os.path.join(self._datadir, vipy.util.filetail(annourl))): vipy.downloader.download_and_unpack(annourl, self._datadir, md5=annomd5) json = vipy.util.readjson(os.path.join(self._datadir, vipy.util.filetail(annourl)[:-7])) # remove trailing file extension (val.json.tar.gz -> val.json) d_imageid_to_filename = {x['id']:os.path.join(self._datadir, x['file_name']) for x in json['images']} d_imageid_to_annotation = {iid:a[0] for (iid,a) in vipy.util.groupbyasdict(json['annotations'], lambda x: x['image_id']).items()} # one annotation per image d_categoryid_to_category = {x['id']:x['name'] for x in json['categories']} imlist = [vipy.image.ImageCategory(filename=f, category=d_categoryid_to_category[d_imageid_to_annotation[iid]['category_id']] if iid in d_imageid_to_annotation else None, attributes={'category_id': d_imageid_to_annotation[iid]['category_id']} if iid in d_imageid_to_annotation else None) for (iid,f) in d_imageid_to_filename.items()] super().__init__(imlist, id='iNaturalist2021') def trainset(self): return self def valset(self): return iNaturalist2021(self._datadir, VAL_IMG_2021_URL, VAL_IMG_2021_MD5, VAL_ANNO_2021_URL, VAL_ANNO_2021_MD5, name='inaturalist_val')
Ancestors
Methods
def trainset(self)
-
Expand source code Browse git
def trainset(self): return self
def valset(self)
-
Expand source code Browse git
def valset(self): return iNaturalist2021(self._datadir, VAL_IMG_2021_URL, VAL_IMG_2021_MD5, VAL_ANNO_2021_URL, VAL_ANNO_2021_MD5, name='inaturalist_val')
Inherited members
Dataset
:archive
categories
chunk
class_to_index
classes
classlist
clone
count
countby
density
duration_in_seconds
filter
flatten
id
index_to_class
inverse_frequency_weight
istype
jsondir
label_to_index
list
load
map
merge
minibatch
multilabel_inverse_frequency_weight
num_categories
num_classes
num_labels
percentage
replace
save
set
shuffle
shuffler
sort
split
split_by_videoid
synonym
take
take_per_category
takefilter
takelist
takeone
to_torch
to_torch_tensordir
tohtml
tojsondir
tolist
video_duration_in_seconds
video_montage
zip