Module vipy.torch
Expand source code Browse git
import vipy
import numpy as np
import copy
import os
import random
import dill
import time
import json
import vipy.util
vipy.util.try_import('torch');
import torch
import torch.utils.data
from torch.utils.data import DataLoader, random_split
class TorchDataset(torch.utils.data.Dataset):
"""Converter from a pycollector dataset to a torch dataset"""
def __init__(self, f_transformer, d):
import vipy.dataset
assert isinstance(d, vipy.dataset.Dataset), "Invalid input"
assert callable(f_transformer), "Invalid input"
self._f_transformer = dill.dumps(f_transformer) # for torch serialization of lambda functions
self.dataset = d
def _unpack(self):
if isinstance(self._f_transformer, bytes):
self._f_transformer = dill.loads(self._f_transformer)
return self
def __iter__(self):
for k in range(len(self)):
yield self[k]
def __getitem__(self, k):
"""Should return tuple(tensor, index)"""
return self._unpack()._f_transformer(self.dataset[k])
def __len__(self):
return len(self.dataset)
class Tensordir(torch.utils.data.Dataset):
"""A torch dataset stored as a directory of .pkl.bz2 files each containing a list of [(tensor, str=json.dumps(label)), ...] tuples used for data augmented training.
This is useful to use the default Dataset loaders in Torch.
Usage:
```python
vipy.torch.Tensordir('/path/to')
vipy.torch.Tensordir( ('/path/to/1', '/path/to/2') )
```
.. note:: This requires python random() and not numpy random
"""
def __init__(self, tensordir, verbose=True, reseed=True, take=None, mutator=None):
assert (isinstance(tensordir, str) and os.path.isdir(tensordir)) or all([os.path.isdir(d) for d in tensordir])
assert mutator is None or callable(mutator)
self._dirlist = [s for d in vipy.util.tolist(tensordir) for s in vipy.util.extlist(d, '.pkl.bz2')]
self._verbose = verbose
self._reseed = reseed
self._mutator = mutator
def __getitem__(self, k):
if self._reseed:
random.seed() # force randomness after fork()
assert k >= 0 and k < len(self._dirlist)
for j in range(0,3):
try:
obj = vipy.util.bz2pkl(self._dirlist[k]) # load me
assert len(obj) > 0, "Invalid augmentation"
(t, lbl) = obj[random.randint(0, len(obj)-1)] # choose one tensor at random
assert t is not None and json.loads(lbl) is not None, "Invalid augmentation" # get another one if the augmentation was invalid
return (t, lbl if self._mutator is None else json.dumps(self._mutator(json.loads(lbl))))
except:
time.sleep(1) # try again after a bit if another process is augmenting this .pkl.bz2 in parallel
if self._verbose:
print('[vipy.dataset.TorchTensordir][WARNING]: %s corrupted or invalid' % self._dirlist[k])
return self.__getitem__(random.randint(0, len(self)-1)) # maximum retries reached, get another one
def __len__(self):
return len(self._dirlist)
def take(self, n):
self._dirlist = [self._dirlist[k] for k in np.random.permutation(range(len(self._dirlist)))[0:n]]
return self
def filter(self, f):
"""Keep elements that lambda evaluates true. The lambda operates on the *absolute path filename* for the tensordir and not the contents. This is useful for filtering by instanceid in the `vipy.util.filebase`."""
assert callable(f)
self._dirlist = [x for x in self._dirlist if f(x)]
return self
def clone(self):
return copy.deepcopy(self)
class TorchTensordir(Tensordir):
pass # alias for backwards compatibility
Classes
class Tensordir (tensordir, verbose=True, reseed=True, take=None, mutator=None)
-
A torch dataset stored as a directory of .pkl.bz2 files each containing a list of [(tensor, str=json.dumps(label)), …] tuples used for data augmented training.
This is useful to use the default Dataset loaders in Torch.
Usage:
vipy.torch.Tensordir('/path/to') vipy.torch.Tensordir( ('/path/to/1', '/path/to/2') )
Note: This requires python random() and not numpy random
Expand source code Browse git
class Tensordir(torch.utils.data.Dataset): """A torch dataset stored as a directory of .pkl.bz2 files each containing a list of [(tensor, str=json.dumps(label)), ...] tuples used for data augmented training. This is useful to use the default Dataset loaders in Torch. Usage: ```python vipy.torch.Tensordir('/path/to') vipy.torch.Tensordir( ('/path/to/1', '/path/to/2') ) ``` .. note:: This requires python random() and not numpy random """ def __init__(self, tensordir, verbose=True, reseed=True, take=None, mutator=None): assert (isinstance(tensordir, str) and os.path.isdir(tensordir)) or all([os.path.isdir(d) for d in tensordir]) assert mutator is None or callable(mutator) self._dirlist = [s for d in vipy.util.tolist(tensordir) for s in vipy.util.extlist(d, '.pkl.bz2')] self._verbose = verbose self._reseed = reseed self._mutator = mutator def __getitem__(self, k): if self._reseed: random.seed() # force randomness after fork() assert k >= 0 and k < len(self._dirlist) for j in range(0,3): try: obj = vipy.util.bz2pkl(self._dirlist[k]) # load me assert len(obj) > 0, "Invalid augmentation" (t, lbl) = obj[random.randint(0, len(obj)-1)] # choose one tensor at random assert t is not None and json.loads(lbl) is not None, "Invalid augmentation" # get another one if the augmentation was invalid return (t, lbl if self._mutator is None else json.dumps(self._mutator(json.loads(lbl)))) except: time.sleep(1) # try again after a bit if another process is augmenting this .pkl.bz2 in parallel if self._verbose: print('[vipy.dataset.TorchTensordir][WARNING]: %s corrupted or invalid' % self._dirlist[k]) return self.__getitem__(random.randint(0, len(self)-1)) # maximum retries reached, get another one def __len__(self): return len(self._dirlist) def take(self, n): self._dirlist = [self._dirlist[k] for k in np.random.permutation(range(len(self._dirlist)))[0:n]] return self def filter(self, f): """Keep elements that lambda evaluates true. The lambda operates on the *absolute path filename* for the tensordir and not the contents. This is useful for filtering by instanceid in the `vipy.util.filebase`.""" assert callable(f) self._dirlist = [x for x in self._dirlist if f(x)] return self def clone(self): return copy.deepcopy(self)
Ancestors
- torch.utils.data.dataset.Dataset
- typing.Generic
Subclasses
Methods
def clone(self)
-
Expand source code Browse git
def clone(self): return copy.deepcopy(self)
def filter(self, f)
-
Keep elements that lambda evaluates true. The lambda operates on the absolute path filename for the tensordir and not the contents. This is useful for filtering by instanceid in the
filebase()
.Expand source code Browse git
def filter(self, f): """Keep elements that lambda evaluates true. The lambda operates on the *absolute path filename* for the tensordir and not the contents. This is useful for filtering by instanceid in the `vipy.util.filebase`.""" assert callable(f) self._dirlist = [x for x in self._dirlist if f(x)] return self
def take(self, n)
-
Expand source code Browse git
def take(self, n): self._dirlist = [self._dirlist[k] for k in np.random.permutation(range(len(self._dirlist)))[0:n]] return self
class TorchDataset (f_transformer, d)
-
Converter from a pycollector dataset to a torch dataset
Expand source code Browse git
class TorchDataset(torch.utils.data.Dataset): """Converter from a pycollector dataset to a torch dataset""" def __init__(self, f_transformer, d): import vipy.dataset assert isinstance(d, vipy.dataset.Dataset), "Invalid input" assert callable(f_transformer), "Invalid input" self._f_transformer = dill.dumps(f_transformer) # for torch serialization of lambda functions self.dataset = d def _unpack(self): if isinstance(self._f_transformer, bytes): self._f_transformer = dill.loads(self._f_transformer) return self def __iter__(self): for k in range(len(self)): yield self[k] def __getitem__(self, k): """Should return tuple(tensor, index)""" return self._unpack()._f_transformer(self.dataset[k]) def __len__(self): return len(self.dataset)
Ancestors
- torch.utils.data.dataset.Dataset
- typing.Generic
class TorchTensordir (tensordir, verbose=True, reseed=True, take=None, mutator=None)
-
A torch dataset stored as a directory of .pkl.bz2 files each containing a list of [(tensor, str=json.dumps(label)), …] tuples used for data augmented training.
This is useful to use the default Dataset loaders in Torch.
Usage:
vipy.torch.Tensordir('/path/to') vipy.torch.Tensordir( ('/path/to/1', '/path/to/2') )
Note: This requires python random() and not numpy random
Expand source code Browse git
class TorchTensordir(Tensordir): pass # alias for backwards compatibility
Ancestors
- Tensordir
- torch.utils.data.dataset.Dataset
- typing.Generic
Inherited members